Intro

In this script, I load exchange data from datras and calculate catch of cod and flounder in unit kg/km^2 (with TVL gear) by size group, by correcting for gear dimensions, sweeplength and trawl speed, following Orio et al 2017.

Load libraries

library(tidyverse)
#> Warning: package 'tidyr' was built under R version 4.0.5
library(readxl)
library(tidylog)
library(RCurl)
library(viridis)
library(RColorBrewer)
#> Warning: package 'RColorBrewer' was built under R version 4.0.5
library(patchwork)
library(janitor)
library(icesDatras)
library(mapdata)
library(patchwork)
library(rgdal)
library(raster)
library(sf)
library(rgeos)
library(chron)
library(lattice)
library(ncdf4)
library(marmap)
library(rnaturalearth)
library(rnaturalearthdata)
library(mapplots)
library(geosphere)
library(modelr)

world <- ne_countries(scale = "medium", returnclass = "sf")

# function to plot proportion zero catches
prop_zero <- function(.data){

  .data %>% 
  mutate(zero_catch = ifelse(haul_cpue_kg == 0, "Y", "N")) %>%
  group_by(zero_catch) %>% 
  summarise(n = n()) %>% 
  mutate(per = prop.table(n) * 100)

}

# Source code for map plots
source("/Users/maxlindmark/Dropbox/Max work/R/cod_interactions/R/functions/map_plot.R")

# Source code for lon lat to utm
source("/Users/maxlindmark/Dropbox/Max work/R/cod_interactions/R/functions/lon_lat_utm.R")

theme_set(theme_plot())

# Continuous colors
options(ggplot2.continuous.colour = "viridis")

# Discrete colors
scale_colour_discrete <- function(...) {
  scale_colour_brewer(palette = "Set1")
}

scale_fill_discrete <- function(...) {
  scale_fill_brewer(palette = "Set1")
}

Read data

# Data were read in from getDATRAS on 2022.09.06
# Read HH data
# bits_hh <- getDATRAS(record = "HH", survey = "BITS", years = 1991:2020, quarters = c(1, 4))
# write.csv(bits_hh, "data/DATRAS_exchange/bits_hh.csv")
bits_hh <- read.csv("data/DATRAS_exchange/bits_hh.csv") %>% filter(Year > 1992) # To match covariates

# Read HL data
# bits_hl <- getDATRAS(record = "HL", survey = "BITS", years = 1991:2020, quarters = c(1, 4))
# write.csv(bits_hl, "data/DATRAS_exchange/bits_hl.csv")
bits_hl <- read.csv("data/DATRAS_exchange/bits_hl.csv") %>% filter(Year > 1992) # To match covariates

# Read CA data
# bits_ca <- getDATRAS(record = "CA", survey = "BITS", years = 1991:2020, quarters = c(1, 4))
# write.csv(bits_ca, "data/DATRAS_exchange/bits_ca.csv")
bits_ca <- read.csv("data/DATRAS_exchange/bits_ca.csv") %>% filter(Year > 1992) # To match covariates

# Read gear standardization data 
sweep <- read.csv("data/from_ale/sweep_9116.csv", sep = ";", dec = ",", fileEncoding = "latin1")
sweep <- read.csv("data/from_ale/sweep_9118_ml.csv", sep = ";", fileEncoding = "latin1")

Standardize catch data

Standardize ships

# Before creating a a new ID, make sure that countries and ships names use the same format
sort(unique(sweep$Ship))
#>  [1] "26HF" "ATL"  "ATLD" "BAL"  "BALL" "BPE"  "CEV"  "CLP"  "CLV"  "COML"
#> [11] "DAN2" "DANS" "DAR"  "GDY"  "HAF"  "KOH"  "KOOT" "MON"  "MONL" "SOL" 
#> [21] "SOL2" "VSH"  "ZBA"
sort(unique(bits_hh$Ship))
#>  [1] "06JR" "06S1" "06SL" "26D4" "26HF" "26HI" "67BC" "77AR" "77MA" "77SE"
#> [11] "AA36" "ESLF" "ESOR" "ESTM" "LAIZ" "LTDA" "RUEK" "RUJB" "RUNT" "RUS6"
sort(unique(bits_hl$Ship))
#>  [1] "06JR" "06S1" "06SL" "26D4" "26HF" "26HI" "67BC" "77AR" "77MA" "77SE"
#> [11] "AA36" "ESLF" "ESOR" "ESTM" "LAIZ" "LTDA" "RUEK" "RUJB" "RUNT" "RUS6"

# Change back to the old Ship name standard...
# https://vocab.ices.dk/?ref=315
# https://vocab.ices.dk/?ref=315
# Assumptions:
# SOL is Solea on ICES links above, and SOL1 is the older one of the two SOLs (1 and 2)
# DAN is Dana
# sweep %>% filter(Ship == "DANS") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "67BC") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "26D4") %>% distinct(Year) # Strange that 26DF doesn't extend far back. Which ship did the Danes use? Ok, I have no Danish data that old.
# bits_hh %>% filter(Country == "DK") %>% distinct(Year)

bits_hh <- bits_hh %>%
  mutate(Ship2 = fct_recode(Ship,
                            "SOL" = "06S1", 
                            "SOL2" = "06SL",
                            "DAN2" = "26D4",
                            "HAF" = "26HF",
                            "HAF" = "26HI",
                            "HAF" = "67BC",
                            "BAL" = "67BC",
                            "ARG" = "77AR",
                            "77SE" = "77SE",
                            "AA36" = "AA36",
                            "KOOT" = "ESLF",
                            "KOH" = "ESTM",
                            "DAR" = "LTDA",
                            "ATLD" = "RUJB",
                            "ATL" = "RUNT"), 
         Ship2 = as.character(Ship2)) %>% 
  mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))

bits_hl <- bits_hl %>%
  mutate(Ship2 = fct_recode(Ship,
                            "SOL" = "06S1", 
                            "SOL2" = "06SL",
                            "DAN2" = "26D4",
                            "HAF" = "26HF",
                            "HAF" = "26HI",
                            "HAF" = "67BC",
                            "BAL" = "67BC",
                            "ARG" = "77AR",
                            "77SE" = "77SE",
                            "AA36" = "AA36",
                            "KOOT" = "ESLF",
                            "KOH" = "ESTM",
                            "DAR" = "LTDA",
                            "ATLD" = "RUJB",
                            "ATL" = "RUNT"), 
         Ship2 = as.character(Ship2)) %>% 
  mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))

bits_ca <- bits_ca %>%
  mutate(Ship2 = fct_recode(Ship,
                            "SOL" = "06S1", 
                            "SOL2" = "06SL",
                            "DAN2" = "26D4",
                            "HAF" = "26HF",
                            "HAF" = "26HI",
                            "HAF" = "67BC",
                            "BAL" = "67BC",
                            "ARG" = "77AR",
                            "77SE" = "77SE",
                            "AA36" = "AA36",
                            "KOOT" = "ESLF",
                            "KOH" = "ESTM",
                            "DAR" = "LTDA",
                            "ATLD" = "RUJB",
                            "ATL" = "RUNT"), 
         Ship2 = as.character(Ship2)) %>% 
  mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))

# Ok, which ships are missing in the exchange data?
unique(bits_hh$Ship3)[!unique(bits_hh$Ship3) %in% unique(sweep$Ship)]
#> [1] "LAIZ" "AA36" "06JR" "ARG"  "RUEK" "RUS6" "77MA" "77SE" "ESOR"
# Swedish Ships and unidentified ships are NOT in the Sweep data
unique(sweep$Ship3)[!unique(sweep$Ship3) %in% unique(bits_hh$Ship3)]
#> NULL
# But all Sweep Ships are in the exchange data

Standardize countries

# Now check which country codes are used
sort(unique(sweep$Country))
#> [1] "DEN" "EST" "GFR" "LAT" "LTU" "POL" "RUS" "SWE"
sort(unique(bits_hh$Country))
#> [1] "DE" "DK" "EE" "LT" "LV" "PL" "RU" "SE"

# https://www.nationsonline.org/oneworld/country_code_list.htm#E
bits_hh <- bits_hh %>%
  mutate(Country = fct_recode(Country,
                              "DEN" = "DK",
                              "EST" = "EE",
                              "GFR" = "DE",
                              "LAT" = "LV",
                              "LTU" = "LT",
                              "POL" = "PL",
                              "RUS" = "RU",
                              "SWE" = "SE"),
         Country = as.character(Country))

bits_hl <- bits_hl %>%
  mutate(Country = fct_recode(Country,
                              "DEN" = "DK",
                              "EST" = "EE",
                              "GFR" = "DE",
                              "LAT" = "LV",
                              "LTU" = "LT",
                              "POL" = "PL",
                              "RUS" = "RU",
                              "SWE" = "SE"),
         Country = as.character(Country))

bits_ca <- bits_ca %>%
  mutate(Country = fct_recode(Country,
                              "DEN" = "DK",
                              "EST" = "EE",
                              "GFR" = "DE",
                              "LAT" = "LV",
                              "LTU" = "LT",
                              "POL" = "PL",
                              "RUS" = "RU",
                              "SWE" = "SE"),
         Country = as.character(Country))

# Gear? Are they the same?
sort(unique(bits_hh$Gear))
#>  [1] "DT"  "ESB" "EXP" "FOT" "GOV" "GRT" "H20" "HAK" "LBT" "LPT" "P20" "PEL"
#> [13] "SON" "TVL" "TVS"
sort(unique(bits_hl$Gear))
#>  [1] "DT"  "ESB" "EXP" "FOT" "GOV" "GRT" "H20" "HAK" "LBT" "P20" "PEL" "SON"
#> [13] "TVL" "TVS"
sort(unique(sweep$Gear))
#>  [1] "CAM" "CHP" "DT"  "EGY" "ESB" "EXP" "GRT" "H20" "HAK" "LBT" "LPT" "P20"
#> [13] "PEL" "SON" "TVL" "TVS"

# Which gears are NOT in the sweep data?
unique(bits_hl$Gear)[!unique(bits_hl$Gear) %in% unique(sweep$Gear)] 
#> [1] "GOV" "FOT"

Create a simple haul ID that works across all exchange data

# Create ID column
bits_ca <- bits_ca %>% 
  mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

bits_hl <- bits_hl %>% 
  mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

bits_hh <- bits_hh %>% 
  mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

# Works like a haul-id
bits_hh %>% group_by(IDx) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1     1

Create the same unique haul-ID in the cpue data that I have in the sweep-file

bits_hl <- bits_hl %>% 
  mutate(haul.id = paste(Year, Quarter, Country, Ship3, Gear, StNo, HaulNo, sep = ":")) 

bits_hh <- bits_hh %>% 
  mutate(haul.id = paste(Year, Quarter, Country, Ship3, Gear, StNo, HaulNo, sep = ":")) 

bits_hh %>% group_by(haul.id) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1     1

Clean DATRAS EXCHANGE data

# Select just valid, additional and no oxygen hauls
bits_hh <- bits_hh %>%
  #filter(!Country == "SWE") %>% # I'll deal with Sweden later...
  filter(HaulVal %in% c("A","N","V"))

# Add ICES rectangle
bits_hh$Rect <- mapplots::ices.rect2(lon = bits_hh$ShootLong, lat = bits_hh$ShootLat)

# Add ICES subdivisions
shape <- shapefile("data/ICES_StatRec_mapto_ICES_Areas/StatRec_map_Areas_Full_20170124.shp")

pts <- SpatialPoints(cbind(bits_hh$ShootLong, bits_hh$ShootLat), 
                     proj4string = CRS(proj4string(shape)))
#> Warning in proj4string(shape): CRS object has comment, which is lost in output

bits_hh$sub_div <- over(pts, shape)$Area_27

# Rename subdivisions to the more common names and do some more filtering (by sub div and area)
sort(unique(bits_hh$sub_div))
#>  [1] "3.a.20"   "3.a.21"   "3.b.23"   "3.c.22"   "3.d.24"   "3.d.25"  
#>  [7] "3.d.26"   "3.d.27"   "3.d.28.1" "3.d.28.2" "3.d.29"

bits_hh <- bits_hh %>% 
  mutate(sub_div = factor(sub_div),
         sub_div = fct_recode(sub_div,
                              "20" = "3.a.20",
                              "21" = "3.a.21",
                              "22" = "3.c.22",
                              "23" = "3.b.23",
                              "24" = "3.d.24",
                              "25" = "3.d.25",
                              "26" = "3.d.26",
                              "27" = "3.d.27",
                              "28" = "3.d.28.1",
                              "28" = "3.d.28.2",
                              "29" = "3.d.29"),
         sub_div = as.character(sub_div)) 

# Now add the fishing line information from the sweep file (we need that later
# to standardize based on gear geometry). We add in the the HH data and then
# transfer it to the other exchange data files when left_joining.
# Check which Fishing lines I have in the sweep data:
fishing_line <- sweep %>% group_by(Gear) %>% distinct(Fishing.line)

bits_hh <- left_join(bits_hh, fishing_line)
# sweep %>% group_by(Gear) %>% distinct(Fishing.line)
# bits_hh %>% group_by(Gear) %>% distinct(Fishing.line)
bits_hh$Fishing.line <- as.numeric(bits_hh$Fishing.line)

# Which gears do now have fishing line?
bits_hh$Fishing.line[is.na(bits_hh$Fishing.line)] <- -9
bits_hh %>% filter(Fishing.line == -9) %>% distinct(Gear)
#>   Gear
#> 1  GRT
#> 2  FOT
#> 3  GOV
#> 4   DT
#> 5  LPT
#> 6  ESB
#> 7  EXP
#> 8  HAK
# 1  GRT
# 2  CAM
# 3  EXP
# 4  FOT
# 5  GOV
# 6  EGY
# 7   DT
# 8  ESB
# 9  HAK

# FROM the index files (Orio, "Research Östersjön 2")
# FOT has 83
# GOV has 160
# ESB ??
# GRT ??
# Rest are unknown and likely not used by Swedish data (therefore their correction
# factors my be in the sweep file)

# Add these values:
bits_hh <- bits_hh %>% mutate(Fishing.line = ifelse(Gear == "FOT", 83, Fishing.line))
bits_hh <- bits_hh %>% mutate(Fishing.line = ifelse(Gear == "GOV", 160, Fishing.line))

# Now select the hauls in the HH data when subsetting the HL data
bits_hl <- bits_hl %>%
  filter(haul.id %in% bits_hh$haul.id)

# Match columns from the HH data to the HL and CA data
sort(unique(bits_hh$sub_div))
#>  [1] "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
sort(colnames(bits_hh))
#>  [1] "BotCurDir"         "BotCurSpeed"       "BotSal"           
#>  [4] "BotTemp"           "Buoyancy"          "BySpecRecCode"    
#>  [7] "CodendMesh"        "Country"           "DataType"         
#> [10] "DateofCalculation" "Day"               "DayNight"         
#> [13] "Depth"             "DepthStratum"      "Distance"         
#> [16] "DoorSpread"        "DoorSurface"       "DoorType"         
#> [19] "DoorWgt"           "Fishing.line"      "Gear"             
#> [22] "GearEx"            "GroundSpeed"       "haul.id"          
#> [25] "HaulDur"           "HaulLat"           "HaulLong"         
#> [28] "HaulNo"            "HaulVal"           "HydroStNo"        
#> [31] "IDx"               "KiteDim"           "MaxTrawlDepth"    
#> [34] "MinTrawlDepth"     "Month"             "Netopening"       
#> [37] "PelSampType"       "Quarter"           "RecordType"       
#> [40] "Rect"              "Rigging"           "SecchiDepth"      
#> [43] "Ship"              "Ship2"             "Ship3"            
#> [46] "ShootLat"          "ShootLong"         "SpeedWater"       
#> [49] "StatRec"           "StdSpecRecCode"    "StNo"             
#> [52] "sub_div"           "SurCurDir"         "SurCurSpeed"      
#> [55] "SurSal"            "SurTemp"           "Survey"           
#> [58] "SweepLngt"         "SwellDir"          "SwellHeight"      
#> [61] "ThClineDepth"      "ThermoCline"       "Tickler"          
#> [64] "TidePhase"         "TideSpeed"         "TimeShot"         
#> [67] "TowDir"            "Turbidity"         "WarpDen"          
#> [70] "Warpdia"           "Warplngt"          "WgtGroundRope"    
#> [73] "WindDir"           "WindSpeed"         "WingSpread"       
#> [76] "X"                 "Year"

# No NAs for the variables going in to the stomach haul ID
unique(is.na(bits_hh %>% dplyr::select(Year, Quarter, Month, Country, Rect, HaulNo)))
#>       Year Quarter Month Country  Rect HaulNo
#> [1,] FALSE   FALSE FALSE   FALSE FALSE  FALSE

# Before making the id_haul_stomach variable we need to change the country column so that it actually matches the stomach data
# This is the stomach version:
#[1] "LV" "PL" "SE" "DK"
unique(bits_hh$Country)
#> [1] "LAT" "POL" "DEN" "GFR" "SWE" "RUS" "EST" "LTU"

# MAKE SURE THE COUNTRY CODE IS THE SAME! FOR NOW I DON*T USE COUNTRY 2
bits_hh <- bits_hh %>% mutate(Country2 = NA,
                              Country2 = ifelse(Country == "LAT", "LV", Country2),
                              Country2 = ifelse(Country == "POL", "PL", Country2),
                              Country2 = ifelse(Country == "SWE", "SE", Country2),
                              Country2 = ifelse(Country == "DEN", "DK", Country2))

bits_hh_merge <- bits_hh %>% 
  mutate(id_haul_stomach = paste(Year, Quarter, Month, Country, Rect, HaulNo, sep = ".")) %>% 
  dplyr::select(sub_div, Rect, HaulVal, StdSpecRecCode, BySpecRecCode, Fishing.line, Month,
                DataType, HaulDur, GroundSpeed, haul.id, IDx, ShootLat, ShootLong, id_haul_stomach)

bits_hl <- left_join(dplyr::select(bits_hl, -haul.id), bits_hh_merge, by = "IDx")
bits_ca <- left_join(bits_ca, bits_hh_merge, by = "IDx")

# Now filter the subdivisions I want from all data sets
bits_hh <- bits_hh %>% filter(sub_div %in% c(24, 25, 26, 27, 28))
bits_hl <- bits_hl %>% filter(sub_div %in% c(24, 25, 26, 27, 28))
bits_ca <- bits_ca %>% filter(sub_div %in% c(24, 25, 26, 27, 28))
bits_hl %>% filter(Year == 2016 & Quarter == 1 & Month == 2 & Country == "SWE" & Rect == "39G4") %>% distinct(HaulNo)
#> filter: removed 820,236 rows (>99%), 144 rows remaining
#> distinct: removed 143 rows (99%), one row remaining
#>   HaulNo
#> 1      1

Filter species

hlcod <- bits_hl %>%
  filter(SpecCode %in% c("126436", "164712")) %>% 
  mutate(Species = "Gadus morhua")

hlfle <- bits_hl %>%
  filter(SpecCode %in% c("127141", "172894")) %>% 
  mutate(Species = "Platichthys flesus")

# Plaice is not included in the stomach study though
# hlpla <- bits_hl %>%
#   filter(SpecCode %in% c("127143", "172902")) %>% 
#   mutate(Species = "Pleuronectes platessa")

Prepare to add 0 catches

# Find common columns in the HH and HL data (here already subset by species)
comcol <- intersect(names(hlcod), names(bits_hh))

# What is the proportion of zero-catch hauls?
# Here we don't have zero catches
hlcod %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(HLNoAtLngt)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  distinct(zero_catch)
#> # A tibble: 1 × 1
#>   zero_catch
#>   <chr>     
#> 1 N

# Cod: Add 0s and then remove lines with SpecVal = 0 (first NA because we don't have a match in the HH, then make them 0 later)
hlcod0 <- full_join(hlcod, bits_hh[, comcol], by = comcol)

# No zeroes yet
hlcod0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(HLNoAtLngt)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  distinct(zero_catch) 
#> # A tibble: 1 × 1
#>   zero_catch
#>   <lgl>     
#> 1 NA

hlcod0$SpecVal[is.na(hlcod0$SpecVal)] <- "zeroCatch"

hlcod0$SpecVal <- factor(hlcod0$SpecVal)

hlcod0 <-  hlcod0 %>% filter(!SpecVal == "0")

# Add species again after merge
hlcod0$Species <- "Gadus morhua"

# Flounder: Add 0s, remove them if StdSpecRecCode !=1 and then remove lines with SpecVal = 0
hlfle0 <- full_join(hlfle, bits_hh[, comcol], by = comcol)

hlfle0 <- hlfle0[!(is.na(hlfle0$Species) & hlfle0$StdSpecRecCode != 1), ] 

hlfle0$SpecVal[is.na(hlfle0$SpecVal)] <- "zeroCatch"
hlfle0$SpecVal <- factor(hlfle0$SpecVal)

hlfle0 <-  hlfle0 %>% filter(!SpecVal == "0")

hlfle0$Species<-"Platichthys flesus"

# Check number of hauls per species
hlcod0 %>% distinct(haul.id) %>% nrow()
#> [1] 12254
hlfle0 %>% distinct(haul.id) %>% nrow()
#> [1] 12016

Create (unstandardized) CPUE for SpecVal=1. If DataType=C then CPUEun=HLNoAtLngt, if DataType=R then CPUEun=HLNoAtLngt/(HaulDur/60), if DataType=S then CPUEun=(HLNoAtLngt*SubFactor)/(HaulDur/60). If SpecVal="zeroCatch" then CPUEun=0, if SpecVal=4 we need to decide (no length measurements, only total catch). Note that here we also add zero CPUE if SpecVal=="zeroCatch".

Then I will sum for the same haul the CPUE of the same length classes if they were sampled with different subfactors or with different sexes.

# Cod
hlcod0 <- hlcod0 %>%
  mutate(CPUEun = ifelse(SpecVal == "1" & DataType == "C",
                         HLNoAtLngt,
                         
                         ifelse(SpecVal == "1" & DataType == "R",
                                HLNoAtLngt/(HaulDur/60),
                                
                                ifelse(SpecVal == "1" & DataType == "S",
                                       (HLNoAtLngt*SubFactor)/(HaulDur/60),
                                       
                                       ifelse(SpecVal == "zeroCatch", 0, NA)))))

# Plot and fill by zero catch
hlcod0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(!CPUEun_haul == 0)
#> # A tibble: 10,268 × 3
#>    haul.id                     Year CPUEun_haul
#>    <chr>                      <int>       <dbl>
#>  1 1993:1:DEN:DAN2:GRT:105:45  1993          37
#>  2 1993:1:DEN:DAN2:GRT:106:46  1993           2
#>  3 1993:1:DEN:DAN2:GRT:107:47  1993          34
#>  4 1993:1:DEN:DAN2:GRT:108:48  1993           5
#>  5 1993:1:DEN:DAN2:GRT:109:49  1993          24
#>  6 1993:1:DEN:DAN2:GRT:11:6    1993         135
#>  7 1993:1:DEN:DAN2:GRT:110:50  1993          40
#>  8 1993:1:DEN:DAN2:GRT:112:51  1993          32
#>  9 1993:1:DEN:DAN2:GRT:113:52  1993          12
#> 10 1993:1:DEN:DAN2:GRT:114:53  1993          16
#> # … with 10,258 more rows

hlcod0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")


# Some rows have multiple rows per combination of length class and haul id, so we need to sum it up 
hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> # A tibble: 2 × 1
#>       n
#>   <int>
#> 1     1
#> 2     2
hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2) %>% as.data.frame() %>% head(20)
#>         X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1  205495         HL   BITS       1     RUS RUJB  HAK        NA   <NA>       NA
#> 2  300422         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 3  300423         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 4  325369         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 5  325375         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 6  326062         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 7  326064         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 8  326065         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 9  326066         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 10 326067         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 11 326068         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 12 326069         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 13 326071         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 14 326073         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 15 326077         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 16 326079         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 17 326080         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 18 326082         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 19 326083         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 20 326085         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#>    StNo HaulNo Year SpecCodeType SpecCode SpecVal  Sex TotalNo CatIdentifier
#> 1  <NA>     10 1998            T   164712       4 <NA>      -9             1
#> 2    14     14 2000            T   164712       1    M       2             1
#> 3    14     14 2000            T   164712       1    F       4             1
#> 4    63     31 2001            T   164712       1    M       5             1
#> 5    63     31 2001            T   164712       1    F       7             1
#> 6    83     37 2001            T   164712       1    M      21             1
#> 7    83     37 2001            T   164712       1    M      21             1
#> 8    83     37 2001            T   164712       1    M      21             1
#> 9    83     37 2001            T   164712       1    M      21             1
#> 10   83     37 2001            T   164712       1    M      21             1
#> 11   83     37 2001            T   164712       1    M      21             1
#> 12   83     37 2001            T   164712       1    M      21             1
#> 13   83     37 2001            T   164712       1    M      21             1
#> 14   83     37 2001            T   164712       1    M      21             1
#> 15   83     37 2001            T   164712       1    F      42             1
#> 16   83     37 2001            T   164712       1    F      42             1
#> 17   83     37 2001            T   164712       1    F      42             1
#> 18   83     37 2001            T   164712       1    F      42             1
#> 19   83     37 2001            T   164712       1    F      42             1
#> 20   83     37 2001            T   164712       1    F      42             1
#>    NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1      NA         1     NA          NA     <NA>        NA         -9     <NA>
#> 2       3         1     NA          32        1        35          2     <NA>
#> 3       3         1     NA          32        1        35          2     <NA>
#> 4       5         1     NA        3142        1        22          2     <NA>
#> 5       7         1     NA        3142        1        22          1     <NA>
#> 6      21         1     NA       64261        1        22          1     <NA>
#> 7      21         1     NA       64261        1        38          2     <NA>
#> 8      21         1     NA       64261        1        39          2     <NA>
#> 9      21         1     NA       64261        1        41          1     <NA>
#> 10     21         1     NA       64261        1        42          1     <NA>
#> 11     21         1     NA       64261        1        44          2     <NA>
#> 12     21         1     NA       64261        1        45          4     <NA>
#> 13     21         1     NA       64261        1        48          2     <NA>
#> 14     21         1     NA       64261        1        52          1     <NA>
#> 15     42         1     NA       64261        1        22          1     <NA>
#> 16     42         1     NA       64261        1        38          2     <NA>
#> 17     42         1     NA       64261        1        39          2     <NA>
#> 18     42         1     NA       64261        1        41          1     <NA>
#> 19     42         1     NA       64261        1        42          4     <NA>
#> 20     42         1     NA       64261        1        44          3     <NA>
#>    LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1           NA          20140617      126436  ATLD  ATLD
#> 2           NA          20131112      126436  KOOT  KOOT
#> 3           NA          20131112      126436  KOOT  KOOT
#> 4           NA          20131113      126436  DAN2  DAN2
#> 5           NA          20131113      126436  DAN2  DAN2
#> 6           NA          20131113      126436  DAN2  DAN2
#> 7           NA          20131113      126436  DAN2  DAN2
#> 8           NA          20131113      126436  DAN2  DAN2
#> 9           NA          20131113      126436  DAN2  DAN2
#> 10          NA          20131113      126436  DAN2  DAN2
#> 11          NA          20131113      126436  DAN2  DAN2
#> 12          NA          20131113      126436  DAN2  DAN2
#> 13          NA          20131113      126436  DAN2  DAN2
#> 14          NA          20131113      126436  DAN2  DAN2
#> 15          NA          20131113      126436  DAN2  DAN2
#> 16          NA          20131113      126436  DAN2  DAN2
#> 17          NA          20131113      126436  DAN2  DAN2
#> 18          NA          20131113      126436  DAN2  DAN2
#> 19          NA          20131113      126436  DAN2  DAN2
#> 20          NA          20131113      126436  DAN2  DAN2
#>                          IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1  1998.1.RUS.RUJB.HAK.NA.10      26 38G9       V              1             1
#> 2  2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 3  2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 4  2001.4.DEN.26D4.TVL.63.31      26 39G8       V              1             1
#> 5  2001.4.DEN.26D4.TVL.63.31      26 39G8       V              1             1
#> 6  2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 7  2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 8  2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 9  2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 10 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 11 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 12 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 13 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 14 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 15 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 16 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 17 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 18 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 19 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 20 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#>    Fishing.line Month DataType HaulDur GroundSpeed                   haul.id
#> 1         -9.00     3        C      30         3.8 1998:1:RUS:ATLD:HAK:NA:10
#> 2         33.22    11        C      30         3.0 2000:4:EST:KOOT:TVS:14:14
#> 3         33.22    11        C      30         3.0 2000:4:EST:KOOT:TVS:14:14
#> 4         63.46    11        R      30         3.0 2001:4:DEN:DAN2:TVL:63:31
#> 5         63.46    11        R      30         3.0 2001:4:DEN:DAN2:TVL:63:31
#> 6         63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 7         63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 8         63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 9         63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 10        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 11        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 12        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 13        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 14        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 15        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 16        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 17        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 18        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 19        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 20        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#>    ShootLat ShootLong       id_haul_stomach      Species   CPUEun n
#> 1   54.6333   19.6500  1998.1.3.RUS.38G9.10 Gadus morhua       NA 2
#> 2   58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua 2.000000 2
#> 3   58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua 2.000000 2
#> 4   55.4699   18.3116 2001.4.11.DEN.39G8.31 Gadus morhua 4.000000 2
#> 5   55.4699   18.3116 2001.4.11.DEN.39G8.31 Gadus morhua 2.000000 2
#> 6   55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 7   55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 8   55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 9   55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 10  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 11  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 12  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 7.741935 2
#> 13  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 14  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 15  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 16  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 17  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 18  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 19  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 7.741935 2
#> 20  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 5.806452 2
test <- hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2)
test_id <- test$haul.id[2]

hlcodL <- hlcod0 %>% 
  group_by(LngtClass, haul.id) %>% 
  mutate(CPUEun = sum(CPUEun)) %>%
  ungroup() %>% 
  mutate(id3 = paste(haul.id, LngtClass)) %>% 
  distinct(id3, .keep_all = TRUE) %>% 
  dplyr::select(-X, -id3) # Clean up a bit

# Check with an ID
filter(hlcod0, haul.id == test_id)
#>        X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1 300422         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 2 300423         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 3 300424         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 4   4998         HH   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#>   StNo HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier
#> 1   14     14 2000            T   164712         1    M       2             1
#> 2   14     14 2000            T   164712         1    F       4             1
#> 3   14     14 2000            T   164712         1    F       4             1
#> 4   14     14 2000         <NA>       NA zeroCatch <NA>      NA            NA
#>   NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1      3         1     NA          32        1        35          2     <NA>
#> 2      3         1     NA          32        1        35          2     <NA>
#> 3      3         1     NA          32        1        39          2     <NA>
#> 4     NA        NA     NA          NA     <NA>        NA         NA     <NA>
#>   LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1          NA          20131112      126436  KOOT  KOOT
#> 2          NA          20131112      126436  KOOT  KOOT
#> 3          NA          20131112      126436  KOOT  KOOT
#> 4          NA          20220301          NA  KOOT  KOOT
#>                         IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 2 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 3 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 4 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#>   Fishing.line Month DataType HaulDur GroundSpeed                   haul.id
#> 1        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#> 2        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#> 3        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#> 4        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#>   ShootLat ShootLong       id_haul_stomach      Species CPUEun
#> 1  58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua      2
#> 2  58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua      2
#> 3  58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua      2
#> 4  58.0167   21.0833                  <NA> Gadus morhua      0
filter(hlcodL, haul.id == test_id) %>% as.data.frame()
#>   RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType StNo
#> 1         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA   14
#> 2         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA   14
#> 3         HH   BITS       4     EST ESLF  TVS        NA   <NA>       NA   14
#>   HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier NoMeas
#> 1     14 2000            T   164712         1    M       2             1      3
#> 2     14 2000            T   164712         1    F       4             1      3
#> 3     14 2000         <NA>       NA zeroCatch <NA>      NA            NA     NA
#>   SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1         1     NA          32        1        35          2     <NA>
#> 2         1     NA          32        1        39          2     <NA>
#> 3        NA     NA          NA     <NA>        NA         NA     <NA>
#>   LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1          NA          20131112      126436  KOOT  KOOT
#> 2          NA          20131112      126436  KOOT  KOOT
#> 3          NA          20220301          NA  KOOT  KOOT
#>                         IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 2 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 3 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#>   Fishing.line Month DataType HaulDur GroundSpeed                   haul.id
#> 1        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#> 2        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#> 3        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#>   ShootLat ShootLong       id_haul_stomach      Species CPUEun
#> 1  58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua      4
#> 2  58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua      2
#> 3  58.0167   21.0833                  <NA> Gadus morhua      0

# Do we still have 0 catches?
hlcodL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(!CPUEun_haul == 0)
#> # A tibble: 10,268 × 3
#>    haul.id                     Year CPUEun_haul
#>    <chr>                      <int>       <dbl>
#>  1 1993:1:DEN:DAN2:GRT:105:45  1993          37
#>  2 1993:1:DEN:DAN2:GRT:106:46  1993           2
#>  3 1993:1:DEN:DAN2:GRT:107:47  1993          34
#>  4 1993:1:DEN:DAN2:GRT:108:48  1993           5
#>  5 1993:1:DEN:DAN2:GRT:109:49  1993          24
#>  6 1993:1:DEN:DAN2:GRT:11:6    1993         135
#>  7 1993:1:DEN:DAN2:GRT:110:50  1993          40
#>  8 1993:1:DEN:DAN2:GRT:112:51  1993          32
#>  9 1993:1:DEN:DAN2:GRT:113:52  1993          12
#> 10 1993:1:DEN:DAN2:GRT:114:53  1993          16
#> # … with 10,258 more rows

hlcodL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")


# Flounder
hlfle0 <- hlfle0 %>%
  mutate(CPUEun = ifelse(SpecVal == "1" & DataType == "C",
                         HLNoAtLngt,
                         
                         ifelse(SpecVal == "1" & DataType == "R",
                                HLNoAtLngt/(HaulDur/60),
                                
                                ifelse(SpecVal == "1" & DataType == "S",
                                       (HLNoAtLngt*SubFactor)/(HaulDur/60),
                                       
                                       ifelse(SpecVal == "zeroCatch", 0, NA)))))

# Sum up the CPUES if multiple per length class and haul
hlfleL <- hlfle0 %>% 
  group_by(LngtClass, haul.id) %>% 
  mutate(CPUEun = sum(CPUEun)) %>%
  ungroup() %>% 
  mutate(id3 = paste(haul.id, LngtClass)) %>% 
  distinct(id3, .keep_all = TRUE) %>% 
  dplyr::select(-X, -id3)

hlfleL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(!CPUEun_haul == 0)
#> # A tibble: 10,013 × 3
#>    haul.id                     Year CPUEun_haul
#>    <chr>                      <int>       <dbl>
#>  1 1993:1:DEN:DAN2:GRT:105:45  1993        2500
#>  2 1993:1:DEN:DAN2:GRT:106:46  1993           8
#>  3 1993:1:DEN:DAN2:GRT:107:47  1993          26
#>  4 1993:1:DEN:DAN2:GRT:108:48  1993          34
#>  5 1993:1:DEN:DAN2:GRT:109:49  1993           8
#>  6 1993:1:DEN:DAN2:GRT:11:6    1993          11
#>  7 1993:1:DEN:DAN2:GRT:110:50  1993           8
#>  8 1993:1:DEN:DAN2:GRT:114:53  1993           1
#>  9 1993:1:DEN:DAN2:GRT:115:54  1993           3
#> 10 1993:1:DEN:DAN2:GRT:117:56  1993         729
#> # … with 10,003 more rows

Get and add annual weight-length relationships from the CA data for both cod and flounder so that I can calculate CPUE in biomass rather than numbers further down

# Cod
bits_ca_cod <- bits_ca %>% 
  filter(SpecCode %in% c("164712", "126436")) %>% 
  mutate(StNo = as.numeric(StNo)) %>% 
  mutate(Species = "Cod") %>% 
  mutate(ID = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
#> Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

# Now I need to copy rows with NoAtLngt > 1 so that 1 row = 1 ind
# First make a small test
# nrow(bits_ca_cod)
# test_id <- head(filter(bits_ca_cod, CANoAtLngt == 5))$ID[1]
# filter(bits_ca_cod, ID == test_id & CANoAtLngt == 5)

bits_ca_cod <- bits_ca_cod %>% map_df(., rep, .$CANoAtLngt)

# head(data.frame(filter(bits_ca_cod, ID == test_id & CANoAtLngt == 5)), 20)
# nrow(bits_ca_cod)
# Looks ok!

# Standardize length and drop NA weights (need that for condition)
bits_ca_cod <- bits_ca_cod %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_kg = IndWgt/1000) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) # Standardize length ((https://vocab.ices.dk/?ref=18))

# Plot
ggplot(bits_ca_cod, aes(IndWgt, length_cm)) +
  geom_point() + 
  facet_wrap(~Year)


# Now extract the coefficients for each year (not bothering with outliers at the moment)
cod_intercept <- bits_ca_cod %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "(Intercept)") %>% 
  mutate(a = exp(estimate)) %>% 
  mutate(Year = as.integer(Year)) %>% 
  dplyr::select(Year, a)

cod_slope <- bits_ca_cod %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "log(length_cm)") %>% 
  mutate(Year = as.integer(Year)) %>% 
  rename("b" = "estimate") %>% 
  dplyr::select(Year, b)

# Flounder
bits_ca_fle <- bits_ca %>% 
  filter(SpecCode %in% c("127141", "172894")) %>% 
  mutate(StNo = as.numeric(StNo)) %>% 
  mutate(Species = "Flounder") %>% 
  mutate(ID = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
#> Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

bits_ca_fle <- bits_ca_fle %>% map_df(., rep, .$CANoAtLngt)

# Standardize length and drop NA weights (need that for condition)
bits_ca_fle <- bits_ca_fle %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_kg = IndWgt/1000) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) %>% # Standardize length ((https://vocab.ices.dk/?ref=18))
  mutate(keep = ifelse(LngtCode == "." & Year == 2008, "N", "Y")) %>%
  filter(keep == "Y") %>% 
  filter(length_cm < 70)

# Plot
ggplot(bits_ca_fle, aes(IndWgt, length_cm, color = LngtCode)) +
  geom_point() + 
  facet_wrap(~Year)


# Now extract the coefficients for each year (not bothering with outliers at the moment)
fle_intercept <- bits_ca_fle %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "(Intercept)") %>% 
  mutate(a = exp(estimate)) %>% 
  mutate(Year = as.integer(Year)) %>% 
  dplyr::select(Year, a)

fle_slope <- bits_ca_fle %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "log(length_cm)") %>% 
  mutate(Year = as.integer(Year)) %>% 
  rename("b" = "estimate") %>% 
  dplyr::select(Year, b)

Join the annual L-W relationships to the respective catch data to calculate CPUE in biomass not abundance

# These are the haul-data
# hlcodL
# hlfleL

hlcodL <- left_join(hlcodL, cod_intercept, by = "Year")
hlcodL <- left_join(hlcodL, cod_slope, by = "Year")

hlfleL <- left_join(hlfleL, fle_intercept, by = "Year")
hlfleL <- left_join(hlfleL, fle_slope, by = "Year")

Convert from CPUE in numbers to kg

# First standardize length to cm and then check how zero-catches are implemented at this stage
hlcodL <- hlcodL %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) # Standardize length ((https://vocab.ices.dk/?ref=18))

filter(hlcodL, length_cm == 0) # No such thing
#> # A tibble: 0 × 51
#> # … with 51 variables: RecordType <chr>, Survey <chr>, Quarter <int>,
#> #   Country <chr>, Ship <chr>, Gear <chr>, SweepLngt <int>, GearEx <chr>,
#> #   DoorType <lgl>, StNo <chr>, HaulNo <int>, Year <int>, SpecCodeType <chr>,
#> #   SpecCode <int>, SpecVal <fct>, Sex <chr>, TotalNo <dbl>,
#> #   CatIdentifier <int>, NoMeas <int>, SubFactor <dbl>, SubWgt <int>,
#> #   CatCatchWgt <int>, LngtCode <chr>, LngtClass <int>, HLNoAtLngt <dbl>,
#> #   DevStage <chr>, LenMeasType <int>, DateofCalculation <int>, …

# Now check if all rows where length is NA are the ones with zero catch!
hlcodL %>% 
  mutate(length2 = replace_na(length_cm, -9),
         no_length = ifelse(length2 < 0, "T", "F")) %>% 
  ggplot(., aes(length2, CPUEun, color = no_length)) + geom_point(alpha = 0.2) + facet_wrap(~no_length)
#> Warning: Removed 1 rows containing missing values (geom_point).


hlcodL %>% filter(CPUEun == 0) %>% distinct(length_cm)
#> # A tibble: 1 × 1
#>   length_cm
#>       <dbl>
#> 1        NA

# Right, so all hauls with zero catch have NA length_cm. I don't have any NA catches
t <- hlcodL %>% drop_na(CPUEun)
t <- hlcodL %>% filter(CPUEun == 0)
t <- hlcodL %>% drop_na(length_cm)

# In other words, a zero catch is when the catch is zero and length_cm is NA
# In order to not get any NA CPUEs in unit biomass because length is NA (I want them instead
# to be 0, as the numbers-CPUE is), I will replace length_cm == NA with length_cm == 0 before
# calculating biomass CPUE
hlcodL <- hlcodL %>% mutate(length_cm2 = replace_na(length_cm, 0))

# Standardize length in the haul-data and calculate weight
hlcodL <- hlcodL %>% 
  mutate(weight_kg = (a*length_cm2^b)/1000) %>% 
  mutate(CPUEun_kg = weight_kg*CPUEun)

# Plot and check it's correct also in this data
ggplot(hlcodL, aes(weight_kg, length_cm2)) +
  geom_point() + 
  facet_wrap(~Year)


# Hmm, some unrealistic weights actually
hlcodL %>% arrange(desc(weight_kg)) %>% as.data.frame() %>% head(50)
#>    RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType  StNo
#> 1          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 2          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 3          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 4          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 5          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 6          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 7          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 8          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 9          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 10         HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 11         HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 12         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA    45
#> 13         HL   BITS       1     SWE 77AR  FOT       185   <NA>       NA    80
#> 14         HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 15         HL   BITS       1     DEN 26D4  GRT        60   <NA>       NA    43
#> 16         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA     9
#> 17         HL   BITS       1     SWE 77AR  FOT       225   <NA>       NA   191
#> 18         HL   BITS       1     DEN 26D4  TVL        75      S       NA    71
#> 19         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA    46
#> 20         HL   BITS       1     DEN 26D4  GRT        60   <NA>       NA   117
#> 21         HL   BITS       4     SWE 77AR  GOV        75   <NA>       NA   584
#> 22         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA    75
#> 23         HL   BITS       4     DEN 26D4  TVL        75      S       NA    10
#> 24         HL   BITS       1     DEN 26D4  GRT        NA      S       NA   136
#> 25         HL   BITS       4     SWE 77AR  TVL        75   <NA>       NA   573
#> 26         HL   BITS       1     SWE 77AR  GOV       100   <NA>       NA   220
#> 27         HL   BITS       1     SWE 77AR  GOV       100   <NA>       NA   237
#> 28         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA   111
#> 29         HL   BITS       4     SWE 77AR  FOT       225   <NA>       NA   231
#> 30         HL   BITS       4     SWE 77AR  FOT       203   <NA>       NA   247
#> 31         HL   BITS       1     SWE 77AR  FOT       225   <NA>       NA   207
#> 32         HL   BITS       4     SWE 77AR  GOV       100   <NA>       NA   652
#> 33         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA    53
#> 34         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA    57
#> 35         HL   BITS       1     RUS RUJB  TVL        NA   <NA>       NA  <NA>
#> 36         HL   BITS       1     GFR 06SL  TVS        NA   <NA>       NA 24316
#> 37         HL   BITS       1     DEN 26D4  GRT       110   <NA>       NA   162
#> 38         HL   BITS       1     DEN 26D4  GRT        60   <NA>       NA    74
#> 39         HL   BITS       1     SWE 77AR  FOT       225   <NA>       NA   222
#> 40         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA    44
#> 41         HL   BITS       1     DEN 26D4  TVL        NA      S     TRUE    95
#> 42         HL   BITS       1     DEN 26D4  TVL        75      S       NA     6
#> 43         HL   BITS       1     SWE 77AR  FOT       185   <NA>       NA    58
#> 44         HL   BITS       4     POL 67BC  TVL        75      S       NA 26132
#> 45         HL   BITS       1     RUS RUJB  HAK        NA   <NA>       NA  <NA>
#> 46         HL   BITS       1     RUS RUJB  HAK        NA   <NA>       NA  <NA>
#> 47         HL   BITS       1     DEN 26D4  TVL        NA      S     TRUE    19
#> 48         HL   BITS       1     DEN 26D4  GRT        60   <NA>       NA    17
#> 49         HL   BITS       4     SWE 77AR  FOT       180   <NA>       NA   252
#> 50         HL   BITS       4     DEN 26D4  TVL        75      S       NA    94
#>    HaulNo Year SpecCodeType SpecCode SpecVal  Sex TotalNo CatIdentifier NoMeas
#> 1      40 1998            T   164712       1 <NA>      17             1     17
#> 2      40 1998            T   164712       1 <NA>      17             1     17
#> 3      40 1998            T   164712       1 <NA>      17             1     17
#> 4      40 1998            T   164712       1 <NA>      17             1     17
#> 5      40 1998            T   164712       1 <NA>      17             1     17
#> 6      40 1998            T   164712       1 <NA>      17             1     17
#> 7      40 1998            T   164712       1 <NA>      17             1     17
#> 8      40 1998            T   164712       1 <NA>      17             1     17
#> 9      40 1998            T   164712       1 <NA>      17             1     17
#> 10     40 1998            T   164712       1 <NA>      17             1     17
#> 11     40 1998            T   164712       1 <NA>      17             1     17
#> 12     25 1996            W   126436       1 <NA>      93             1     93
#> 13     32 1993            W   126436       1 <NA>     454             1    454
#> 14     40 1998            T   164712       1 <NA>      17             1     17
#> 15     22 1994            T   164712       1 <NA>      37             1     37
#> 16      5 1996            W   126436       1 <NA>     406             1    406
#> 17      3 1998            T   164712       1 <NA>     546             1    273
#> 18     36 2003            T   164712       1 <NA>     255             1    255
#> 19     20 1993            W   126436       1 <NA>     326             1    163
#> 20     50 1994            T   164712       1 <NA>     775             1    775
#> 21     18 2000            W   126436       1 <NA>     454             1    227
#> 22     40 1996            W   126436       1 <NA>      29             1     29
#> 23      4 2002            T   164712       1 <NA>    1958             1   1958
#> 24     64 1998            T   164712       1 <NA>     130             1    130
#> 25     11 2002            W   126436       1 <NA>    3090             1   1545
#> 26     21 2000            W   126436       1 <NA>    1076             1    538
#> 27     34 2000            W   126436       1 <NA>     638             1    319
#> 28     47 1996            W   126436       1 <NA>      78             1     78
#> 29     26 1996            W   126436       1 <NA>      54             1     27
#> 30      2 1995            W   126436       1 <NA>    2736             1   1368
#> 31     15 1998            T   164712       1 <NA>      20             1     10
#> 32     17 1999            T   164712       1 <NA>    1484             1    742
#> 33     30 1996            W   126436       1 <NA>     197             1    197
#> 34     23 1995            W   126436       1 <NA>     223             1    223
#> 35     13 2006            W   126436       1 <NA>      24             1     12
#> 36     41 2008            T   164712       1 <NA>     864             1    864
#> 37     72 1993            W   126436       1 <NA>     128             1    128
#> 38     39 1994            T   164712       1 <NA>     457             1    457
#> 39     26 1998            T   164712       1 <NA>     126             1     63
#> 40     24 1994            T   164712       1 <NA>     260             1    130
#> 41     66 2005            W   126436       1 <NA>     764             1    764
#> 42      3 2001            T   164712       1 <NA>     132             1    132
#> 43     10 1993            W   126436       1 <NA>      85             1     85
#> 44     24 2008            T   164712       1 <NA>    1696             1    848
#> 45     35 1998            T   164712       1 <NA>    1214             1    390
#> 46     33 1998            T   164712       1 <NA>     302             1    151
#> 47     17 2009            W   126436       1 <NA>     972             1    972
#> 48      9 1993            W   126436       1 <NA>     281             1    277
#> 49     21 1993            W   126436       1 <NA>    5932             1   1499
#> 50     46 2000            T   164712       1 <NA>     180             1    180
#>    SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1          1     NA       13275        0       335          1     <NA>
#> 2          1     NA       13275        0       285          1     <NA>
#> 3          1     NA       13275        0       225          2     <NA>
#> 4          1     NA       13275        0       220          1     <NA>
#> 5          1     NA       13275        0       215          1     <NA>
#> 6          1     NA       13275        0       180          1     <NA>
#> 7          1     NA       13275        0       175          1     <NA>
#> 8          1     NA       13275        0       160          1     <NA>
#> 9          1     NA       13275        0       150          2     <NA>
#> 10         1     NA       13275        0       145          3     <NA>
#> 11         1     NA       13275        0       140          1     <NA>
#> 12         1 113900      113900        .      1360          1     <NA>
#> 13         1     NA      234500        1       127          1     <NA>
#> 14         1     NA       13275        0       130          2     <NA>
#> 15         1     NA        1033        1       127          1     <NA>
#> 16         1 257280      257280        .      1230          1     <NA>
#> 17         1     NA      484399        1       121          2     <NA>
#> 18         1     NA      158200        1       121          1     <NA>
#> 19         1     NA        2190        1       116          2     <NA>
#> 20         1     NA        5718        1       118          1     <NA>
#> 21         1     NA      268580        1       118          2     <NA>
#> 22         1  48300       48300        .      1170          1     <NA>
#> 23         1     NA     1040200        1       118          1     <NA>
#> 24         1     NA      157570        1       116          1     <NA>
#> 25         1     NA     1302400        1       117          2     <NA>
#> 26         1     NA      303000        1       116          2     <NA>
#> 27         1     NA      158200        1       115          2     <NA>
#> 28         1 119000      119000        .      1130          1     <NA>
#> 29         1     NA       96200        1       113          2     <NA>
#> 30         1     NA      607800        1       111          2     <NA>
#> 31         1     NA       76000        1       113          2     <NA>
#> 32         1     NA      267459        1       113          2     <NA>
#> 33         1 192900      192900        .      1120          1     <NA>
#> 34         1 149100      149100        .      1100          1     <NA>
#> 35         1     NA       39544        1       118          2     <NA>
#> 36         1     NA      247830        1       119          1     <NA>
#> 37         1     NA        1079        1       109          1     <NA>
#> 38         1     NA          42        1       112          1     <NA>
#> 39         1     NA       48400        1       111          2     <NA>
#> 40         1     NA        2374        1       111          2     <NA>
#> 41         1 139400      139400        .      1150          1     <NA>
#> 42         1     NA       61000        1       111          1     <NA>
#> 43         1     NA       46000        1       107          1     <NA>
#> 44         1     NA      968860        1       116          2     <NA>
#> 45         1     NA       16278        1       109          3     <NA>
#> 46         1     NA        2962        1       109          2     <NA>
#> 47         1 322476      322476        .      1130          1     <NA>
#> 48         1     NA        1297        1       106          1     <NA>
#> 49         1     NA     5688600        1       106          3     <NA>
#> 50         1     NA       46605        1       109          1     <NA>
#>    LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1           NA          20140617      126436  DAN2  DAN2
#> 2           NA          20140617      126436  DAN2  DAN2
#> 3           NA          20140617      126436  DAN2  DAN2
#> 4           NA          20140617      126436  DAN2  DAN2
#> 5           NA          20140617      126436  DAN2  DAN2
#> 6           NA          20140617      126436  DAN2  DAN2
#> 7           NA          20140617      126436  DAN2  DAN2
#> 8           NA          20140617      126436  DAN2  DAN2
#> 9           NA          20140617      126436  DAN2  DAN2
#> 10          NA          20140617      126436  DAN2  DAN2
#> 11          NA          20140617      126436  DAN2  DAN2
#> 12          NA          20190208      126436  DAN2  DAN2
#> 13          NA          20211203      126436   ARG   ARG
#> 14          NA          20140617      126436  DAN2  DAN2
#> 15          NA          20161213      126436  DAN2  DAN2
#> 16          NA          20190208      126436  DAN2  DAN2
#> 17          NA          20140617      126436   ARG   ARG
#> 18          NA          20131108      126436  DAN2  DAN2
#> 19          NA          20211203      126436   SOL   SOL
#> 20          NA          20161213      126436  DAN2  DAN2
#> 21          NA          20131112      126436   ARG   ARG
#> 22          NA          20190208      126436  DAN2  DAN2
#> 23          NA          20131113      126436  DAN2  DAN2
#> 24          NA          20140617      126436  DAN2  DAN2
#> 25          NA          20131113      126436   ARG   ARG
#> 26          NA          20190228      126436   ARG   ARG
#> 27          NA          20190228      126436   ARG   ARG
#> 28          NA          20190208      126436  DAN2  DAN2
#> 29          NA          20161115      126436   ARG   ARG
#> 30          NA          20161115      126436   ARG   ARG
#> 31          NA          20140617      126436   ARG   ARG
#> 32          NA          20131112      126436   ARG   ARG
#> 33          NA          20190208      126436  DAN2  DAN2
#> 34          NA          20190207      126436  DAN2  DAN2
#> 35          NA          20200115      126436  ATLD  ATLD
#> 36          NA          20180423      126436  SOL2  SOL2
#> 37          NA          20211203      126436  DAN2  DAN2
#> 38          NA          20161213      126436  DAN2  DAN2
#> 39          NA          20140617      126436   ARG   ARG
#> 40          NA          20161213      126436   SOL   SOL
#> 41          NA          20131204      126436  DAN2  DAN2
#> 42          NA          20160714      126436  DAN2  DAN2
#> 43          NA          20211203      126436   ARG   ARG
#> 44          NA          20180507      126436   BAL   BAL
#> 45          NA          20140617      126436  ATLD  ATLD
#> 46          NA          20140617      126436  ATLD  ATLD
#> 47          NA          20131216      126436  DAN2  DAN2
#> 48          NA          20211203      126436  DAN2  DAN2
#> 49          NA          20131111      126436   ARG   ARG
#> 50          NA          20131112      126436  DAN2  DAN2
#>                             IDx sub_div Rect HaulVal StdSpecRecCode
#> 1     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 2     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 3     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 4     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 5     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 6     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 7     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 8     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 9     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 10    1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 11    1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 12    1996.1.DEN.26D4.GRT.45.25      26 41H0       V              1
#> 13    1993.1.SWE.77AR.FOT.80.32      25 39G5       V              1
#> 14    1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 15    1994.1.DEN.26D4.GRT.43.22      26 41H0       V              1
#> 16      1996.1.DEN.26D4.GRT.9.5      25 38G5       V              1
#> 17    1998.1.SWE.77AR.FOT.191.3      25 40G5       V              1
#> 18    2003.1.DEN.26D4.TVL.71.36      25 39G5       V              1
#> 19    1993.1.GFR.06S1.H20.46.20      24 39G4       V              1
#> 20   1994.1.DEN.26D4.GRT.117.50      25 38G5       V              1
#> 21   2000.4.SWE.77AR.GOV.584.18      25 40G5       V              1
#> 22    1996.1.DEN.26D4.GRT.75.40      25 39G6       V              1
#> 23     2002.4.DEN.26D4.TVL.10.4      24 39G4       V              1
#> 24   1998.1.DEN.26D4.GRT.136.64      26 39G8       V              1
#> 25   2002.4.SWE.77AR.TVL.573.11      25 41G7       V              1
#> 26   2000.1.SWE.77AR.GOV.220.21      24 39G3       V              1
#> 27   2000.1.SWE.77AR.GOV.237.34      25 40G5       V              1
#> 28   1996.1.DEN.26D4.GRT.111.47      25 39G5       V              1
#> 29   1996.4.SWE.77AR.FOT.231.26      28 43G8       V              1
#> 30    1995.4.SWE.77AR.FOT.247.2      25 40G5       V              1
#> 31   1998.1.SWE.77AR.FOT.207.15      25 40G7       V              1
#> 32   1999.4.SWE.77AR.GOV.652.17      25 40G5       V              1
#> 33    1996.1.DEN.26D4.GRT.53.30      26 41G9       V              1
#> 34    1995.1.DEN.26D4.GRT.57.23      26 41G9       V              1
#> 35    2006.1.RUS.RUJB.TVL.NA.13      26 39H0       V              1
#> 36 2008.1.GFR.06SL.TVS.24316.41      24 39G4       V              1
#> 37   1993.1.DEN.26D4.GRT.162.72      25 38G5       V              1
#> 38    1994.1.DEN.26D4.GRT.74.39      26 41G8       V              1
#> 39   1998.1.SWE.77AR.FOT.222.26      28 43H0       V              1
#> 40    1994.1.GFR.06S1.H20.44.24      24 39G3       V              1
#> 41    2005.1.DEN.26D4.TVL.95.66      25 39G5       V              1
#> 42      2001.1.DEN.26D4.TVL.6.3      24 39G3       V              1
#> 43    1993.1.SWE.77AR.FOT.58.10      25 41G7       V              1
#> 44 2008.4.POL.67BC.TVL.26132.24      26 38G8       V              3
#> 45    1998.1.RUS.RUJB.HAK.NA.35      26 40G9       V              1
#> 46    1998.1.RUS.RUJB.HAK.NA.33      26 40G9       V              1
#> 47    2009.1.DEN.26D4.TVL.19.17      24 39G4       V              1
#> 48     1993.1.DEN.26D4.GRT.17.9      24 39G4       V              1
#> 49   1993.4.SWE.77AR.FOT.252.21      25 41G7       V              1
#> 50    2000.4.DEN.26D4.TVL.94.46      25 39G6       V              1
#>    BySpecRecCode Fishing.line Month DataType HaulDur GroundSpeed
#> 1              1        -9.00     3        R      60         3.0
#> 2              1        -9.00     3        R      60         3.0
#> 3              1        -9.00     3        R      60         3.0
#> 4              1        -9.00     3        R      60         3.0
#> 5              1        -9.00     3        R      60         3.0
#> 6              1        -9.00     3        R      60         3.0
#> 7              1        -9.00     3        R      60         3.0
#> 8              1        -9.00     3        R      60         3.0
#> 9              1        -9.00     3        R      60         3.0
#> 10             1        -9.00     3        R      60         3.0
#> 11             1        -9.00     3        R      60         3.0
#> 12             1        -9.00     2        R      60         3.4
#> 13             1        83.00     3        C      60         3.0
#> 14             1        -9.00     3        R      60         3.0
#> 15             1        -9.00     3        C      60        -9.0
#> 16             1        -9.00     2        R      60         3.6
#> 17             1        83.00     2        C      30         3.3
#> 18             1        63.46     3        R      30         3.0
#> 19             1        36.00     2        C      30         4.0
#> 20             1        -9.00     3        C      60        -9.0
#> 21             1       160.00    11        C      30         3.7
#> 22             1        -9.00     3        R      60         3.2
#> 23             1        63.46    11        R      30         3.1
#> 24             1        -9.00     3        R      60         3.3
#> 25             1        63.46    11        C      30         3.0
#> 26             1       160.00     3        C      30         3.5
#> 27             1       160.00     3        C      30         3.5
#> 28             1        -9.00     3        R      60         3.5
#> 29             1        83.00    12        C      30         3.3
#> 30             1        83.00    12        C      30         3.7
#> 31             1        83.00     2        C      30         3.3
#> 32             1       160.00    11        C      30         3.3
#> 33             1        -9.00     3        R      60         3.1
#> 34             1        -9.00     2        R      60        -9.0
#> 35             1        63.46     3        C      30         3.0
#> 36             1        33.22     3        R      30         3.2
#> 37             1        -9.00     3        C      60        -9.0
#> 38             1        -9.00     3        C      60        -9.0
#> 39             1        83.00     3        C      30         3.4
#> 40             1        36.00     2        C      30         3.8
#> 41             1        63.46     3        R      30         3.0
#> 42             1        63.46     2        R      30         3.0
#> 43             1        83.00     3        C      60         3.5
#> 44             0        63.46    11        C      30         3.0
#> 45             1        -9.00     3        C      30         3.6
#> 46             1        -9.00     3        C      30         3.6
#> 47             1        63.46     3        R      30         2.9
#> 48             1        -9.00     3        C      59        -9.0
#> 49             1        83.00    11        C      30         3.0
#> 50             1        63.46    11        R      31         2.9
#>                         haul.id ShootLat ShootLong       id_haul_stomach
#> 1     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 2     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 3     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 4     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 5     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 6     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 7     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 8     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 9     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 10    1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 11    1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 12    1996:1:DEN:DAN2:GRT:45:25  56.3700   20.0666  1996.1.2.DEN.41H0.25
#> 13     1993:1:SWE:ARG:FOT:80:32  55.4120   15.3205  1993.1.3.SWE.39G5.32
#> 14    1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 15    1994:1:DEN:DAN2:GRT:43:22  56.4500   20.2000  1994.1.3.DEN.41H0.22
#> 16      1996:1:DEN:DAN2:GRT:9:5  54.5750   15.3200   1996.1.2.DEN.38G5.5
#> 17     1998:1:SWE:ARG:FOT:191:3  55.8218   15.4220   1998.1.2.SWE.40G5.3
#> 18    2003:1:DEN:DAN2:TVL:71:36  55.3709   15.5140  2003.1.3.DEN.39G5.36
#> 19     1993:1:GFR:SOL:H20:46:20  55.1333   14.1667  1993.1.2.GFR.39G4.20
#> 20   1994:1:DEN:DAN2:GRT:117:50  54.7833   15.9500  1994.1.3.DEN.38G5.50
#> 21    2000:4:SWE:ARG:GOV:584:18  55.8150   15.3917 2000.4.11.SWE.40G5.18
#> 22    1996:1:DEN:DAN2:GRT:75:40  55.1983   16.4916  1996.1.3.DEN.39G6.40
#> 23     2002:4:DEN:DAN2:TVL:10:4  55.4908   14.6121  2002.4.11.DEN.39G4.4
#> 24   1998:1:DEN:DAN2:GRT:136:64  55.4800   18.4683  1998.1.3.DEN.39G8.64
#> 25    2002:4:SWE:ARG:TVL:573:11  56.1460   17.7615 2002.4.11.SWE.41G7.11
#> 26    2000:1:SWE:ARG:GOV:220:21  55.0100   13.9300  2000.1.3.SWE.39G3.21
#> 27    2000:1:SWE:ARG:GOV:237:34  55.8467   15.5717  2000.1.3.SWE.40G5.34
#> 28   1996:1:DEN:DAN2:GRT:111:47  55.2150   15.6083  1996.1.3.DEN.39G5.47
#> 29    1996:4:SWE:ARG:FOT:231:26  57.0833   18.9000 1996.4.12.SWE.43G8.26
#> 30     1995:4:SWE:ARG:FOT:247:2  55.8323   15.5427  1995.4.12.SWE.40G5.2
#> 31    1998:1:SWE:ARG:FOT:207:15  55.9271   17.1108  1998.1.2.SWE.40G7.15
#> 32    1999:4:SWE:ARG:GOV:652:17  55.8400   15.5600 1999.4.11.SWE.40G5.17
#> 33    1996:1:DEN:DAN2:GRT:53:30  56.2566   19.5666  1996.1.3.DEN.41G9.30
#> 34    1995:1:DEN:DAN2:GRT:57:23  56.3666   19.8666  1995.1.2.DEN.41G9.23
#> 35    2006:1:RUS:ATLD:TVL:NA:13  55.1583   20.0267  2006.1.3.RUS.39H0.13
#> 36 2008:1:GFR:SOL2:TVS:24316:41  55.1505   14.0718  2008.1.3.GFR.39G4.41
#> 37   1993:1:DEN:DAN2:GRT:162:72  54.8500   15.6667  1993.1.3.DEN.38G5.72
#> 38    1994:1:DEN:DAN2:GRT:74:39  56.1167   18.2667  1994.1.3.DEN.41G8.39
#> 39    1998:1:SWE:ARG:FOT:222:26  57.2520   20.7436  1998.1.3.SWE.43H0.26
#> 40     1994:1:GFR:SOL:H20:44:24  55.0333   13.4333  1994.1.2.GFR.39G3.24
#> 41    2005:1:DEN:DAN2:TVL:95:66  55.0966   15.2680  2005.1.3.DEN.39G5.66
#> 42      2001:1:DEN:DAN2:TVL:6:3  55.0214   13.7642   2001.1.2.DEN.39G3.3
#> 43     1993:1:SWE:ARG:FOT:58:10  56.0352   17.7188  1993.1.3.SWE.41G7.10
#> 44  2008:4:POL:BAL:TVL:26132:24  54.5400   18.8867 2008.4.11.POL.38G8.24
#> 45    1998:1:RUS:ATLD:HAK:NA:35  55.5167   19.8833  1998.1.3.RUS.40G9.35
#> 46    1998:1:RUS:ATLD:HAK:NA:33  55.6167   19.6500  1998.1.3.RUS.40G9.33
#> 47    2009:1:DEN:DAN2:TVL:19:17  55.3195   14.9859  2009.1.3.DEN.39G4.17
#> 48     1993:1:DEN:DAN2:GRT:17:9  55.0333   14.2167   1993.1.3.DEN.39G4.9
#> 49    1993:4:SWE:ARG:FOT:252:21  56.1133   17.5917 1993.4.11.SWE.41G7.21
#> 50    2000:4:DEN:DAN2:TVL:94:46  55.1058   16.3850 2000.4.11.DEN.39G6.46
#>         Species   CPUEun           a        b length_cm length_cm2 weight_kg
#> 1  Gadus morhua 1.000000 0.007345618 3.082202       335        335 445.37563
#> 2  Gadus morhua 1.000000 0.007345618 3.082202       285        285 270.61760
#> 3  Gadus morhua 2.000000 0.007345618 3.082202       225        225 130.59602
#> 4  Gadus morhua 1.000000 0.007345618 3.082202       220        220 121.85635
#> 5  Gadus morhua 1.000000 0.007345618 3.082202       215        215 113.52063
#> 6  Gadus morhua 1.000000 0.007345618 3.082202       180        180  65.64985
#> 7  Gadus morhua 1.000000 0.007345618 3.082202       175        175  60.19004
#> 8  Gadus morhua 1.000000 0.007345618 3.082202       160        160  45.66372
#> 9  Gadus morhua 2.000000 0.007345618 3.082202       150        150  37.42667
#> 10 Gadus morhua 3.000000 0.007345618 3.082202       145        145  33.71329
#> 11 Gadus morhua 1.000000 0.007345618 3.082202       140        140  30.25718
#> 12 Gadus morhua 1.000000 0.008380383 3.055535       136        136  27.69283
#> 13 Gadus morhua 1.000000 0.007244481 3.100800       127        127  24.18146
#> 14 Gadus morhua 2.000000 0.007345618 3.082202       130        130  24.07846
#> 15 Gadus morhua 1.000000 0.008339286 3.052078       127        127  21.98379
#> 16 Gadus morhua 1.000000 0.008380383 3.055535       123        123  20.37242
#> 17 Gadus morhua 2.000000 0.007345618 3.082202       121        121  19.30161
#> 18 Gadus morhua 2.000000 0.007444717 3.070310       121        121  18.47755
#> 19 Gadus morhua 2.000000 0.007244481 3.100800       116        116  18.25909
#> 20 Gadus morhua 1.000000 0.008339286 3.052078       118        118  17.56609
#> 21 Gadus morhua 2.000000 0.007554303 3.071838       118        118  17.48561
#> 22 Gadus morhua 1.000000 0.008380383 3.055535       117        117  17.48553
#> 23 Gadus morhua 2.000000 0.007243730 3.076256       118        118  17.12383
#> 24 Gadus morhua 1.000000 0.007345618 3.082202       116        116  16.94747
#> 25 Gadus morhua 2.000000 0.007243730 3.076256       117        117  16.68132
#> 26 Gadus morhua 2.000000 0.007554303 3.071838       116        116  16.59110
#> 27 Gadus morhua 2.000000 0.007554303 3.071838       115        115  16.15566
#> 28 Gadus morhua 1.000000 0.008380383 3.055535       113        113  15.72235
#> 29 Gadus morhua 2.000000 0.008380383 3.055535       113        113  15.72235
#> 30 Gadus morhua 2.000000 0.007216770 3.098570       111        111  15.70067
#> 31 Gadus morhua 2.000000 0.007345618 3.082202       113        113  15.63258
#> 32 Gadus morhua 2.000000 0.006831731 3.097311       113        113  15.61542
#> 33 Gadus morhua 1.000000 0.008380383 3.055535       112        112  15.30107
#> 34 Gadus morhua 1.000000 0.007216770 3.098570       110        110  15.26652
#> 35 Gadus morhua 2.000000 0.009208543 3.001584       118        118  15.24467
#> 36 Gadus morhua 2.000000 0.009197813 2.994628       119        119  15.10690
#> 37 Gadus morhua 1.000000 0.007244481 3.100800       109        109  15.05428
#> 38 Gadus morhua 1.000000 0.008339286 3.052078       112        112  14.97969
#> 39 Gadus morhua 2.000000 0.007345618 3.082202       111        111  14.79541
#> 40 Gadus morhua 2.000000 0.008339286 3.052078       111        111  14.57521
#> 41 Gadus morhua 2.000000 0.008377248 3.027632       115        115  14.52564
#> 42 Gadus morhua 2.000000 0.007693063 3.064660       111        111  14.26658
#> 43 Gadus morhua 1.000000 0.007244481 3.100800       107        107  14.21416
#> 44 Gadus morhua 2.000000 0.009197813 2.994628       116        116  13.99484
#> 45 Gadus morhua 3.000000 0.007345618 3.082202       109        109  13.98905
#> 46 Gadus morhua 2.000000 0.007345618 3.082202       109        109  13.98905
#> 47 Gadus morhua 2.000000 0.007746254 3.044723       113        113  13.80850
#> 48 Gadus morhua 1.000000 0.007244481 3.100800       106        106  13.80627
#> 49 Gadus morhua 3.000000 0.007244481 3.100800       106        106  13.80627
#> 50 Gadus morhua 1.935484 0.007554303 3.071838       109        109  13.70373
#>    CPUEun_kg
#> 1  445.37563
#> 2  270.61760
#> 3  261.19205
#> 4  121.85635
#> 5  113.52063
#> 6   65.64985
#> 7   60.19004
#> 8   45.66372
#> 9   74.85333
#> 10 101.13986
#> 11  30.25718
#> 12  27.69283
#> 13  24.18146
#> 14  48.15692
#> 15  21.98379
#> 16  20.37242
#> 17  38.60322
#> 18  36.95509
#> 19  36.51819
#> 20  17.56609
#> 21  34.97121
#> 22  17.48553
#> 23  34.24765
#> 24  16.94747
#> 25  33.36265
#> 26  33.18221
#> 27  32.31132
#> 28  15.72235
#> 29  31.44471
#> 30  31.40135
#> 31  31.26517
#> 32  31.23085
#> 33  15.30107
#> 34  15.26652
#> 35  30.48935
#> 36  30.21379
#> 37  15.05428
#> 38  14.97969
#> 39  29.59081
#> 40  29.15042
#> 41  29.05128
#> 42  28.53316
#> 43  14.21416
#> 44  27.98968
#> 45  41.96716
#> 46  27.97811
#> 47  27.61701
#> 48  13.80627
#> 49  41.41880
#> 50  26.52336
hlcodL <- hlcodL %>% filter(weight_kg < 100 & length_cm2 < 135)

ggplot(hlcodL, aes(weight_kg, length_cm2)) +
  geom_point() + 
  facet_wrap(~Year)


# Now do the same for flounder
# First standardize length to cm and then check how zero-catches are implemented at this stage
hlfleL <- hlfleL %>% 
  mutate(length_cm = ifelse(LngtCode %in% c(".", "0"), 
                            LngtClass/10,
                            LngtClass)) # Standardize length (https://vocab.ices.dk/?ref=18)

filter(hlfleL, length_cm == 0) # No such thing
#> # A tibble: 0 × 51
#> # … with 51 variables: RecordType <chr>, Survey <chr>, Quarter <int>,
#> #   Country <chr>, Ship <chr>, Gear <chr>, SweepLngt <int>, GearEx <chr>,
#> #   DoorType <lgl>, StNo <chr>, HaulNo <int>, Year <int>, SpecCodeType <chr>,
#> #   SpecCode <int>, SpecVal <fct>, Sex <chr>, TotalNo <dbl>,
#> #   CatIdentifier <int>, NoMeas <int>, SubFactor <dbl>, SubWgt <int>,
#> #   CatCatchWgt <int>, LngtCode <chr>, LngtClass <int>, HLNoAtLngt <dbl>,
#> #   DevStage <chr>, LenMeasType <int>, DateofCalculation <int>, …

bits_ca_fle <- bits_ca_fle %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_kg = IndWgt/1000) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) %>% # Standardize length ((https://vocab.ices.dk/?ref=18))
  mutate(keep = ifelse(LngtCode == "." & Year == 2008, "N", "Y")) %>%
  filter(keep == "Y") %>% 
  filter(length_cm < 70)

# Now check if all rows where length is NA are the ones with zero catch!
hlfleL %>% 
  mutate(length2 = replace_na(length_cm, -9),
         no_length = ifelse(length2 < 0, "T", "F")) %>% 
  ggplot(., aes(length2, CPUEun, color = no_length)) + geom_point(alpha = 0.2) + facet_wrap(~no_length)
#> Warning: Removed 13 rows containing missing values (geom_point).


hlfleL %>% mutate(length2 = replace_na(length_cm, -9)) %>% group_by(length2) %>% distinct(CPUEun) %>% arrange(CPUEun)
#> # A tibble: 12,344 × 2
#> # Groups:   length2 [253]
#>    CPUEun length2
#>     <dbl>   <dbl>
#>  1  0        -9  
#>  2  0.667    19  
#>  3  0.667    21  
#>  4  0.667    35  
#>  5  0.667    39  
#>  6  0.667    40  
#>  7  0.667    42  
#>  8  0.870    27  
#>  9  0.870    32  
#> 10  0.870    37.5
#> # … with 12,334 more rows

# Right, so all hauls with zero catch have NA length_cm. I don't have any NA catches
t <- hlfleL %>% drop_na(CPUEun)
# Well, 11 rows. I will remove them
hlfleL <- hlfleL %>% drop_na(CPUEun)
t <- hlfleL %>% filter(CPUEun == 0) %>% distinct(length_cm)
t <- hlfleL %>% drop_na(length_cm)

# In other words, a zero catch is when the catch is zero and length_cm is NA
# In order to not get any NA CPUEs in unit biomass because length is NA (I want them instead
# to be 0, as the numbers-CPUE is), I will replace length_cm == NA with length_cm == 0 before
# calculating biomass cpue
hlfleL <- hlfleL %>% mutate(length_cm2 = replace_na(length_cm, 0))

# Standardize length in the haul-data and calculate weight
hlfleL <- hlfleL %>% 
  mutate(weight_kg = (a*length_cm2^b)/1000) %>% 
  mutate(CPUEun_kg = weight_kg*CPUEun)

# Plot and check it's correct also in this data
ggplot(hlfleL, aes(weight_kg, length_cm2)) +
  geom_point() + 
  facet_wrap(~Year)


# Check
t <- hlfleL %>% drop_na(CPUEun_kg) # Should not have any NA in biomass-catch
t <- hlfleL %>% filter(CPUEun_kg == 0) # Should result in a few percent of rows (note this is not proportion of hauls, but rows)
t <- hlfleL %>% drop_na(length_cm2) # Should be no NA

# What is the proportion of zero-catch hauls?
hlcodL %>%
  group_by(haul.id) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(CPUEun_haul == 0)
#> # A tibble: 1,985 × 2
#>    haul.id                  CPUEun_haul
#>    <chr>                          <dbl>
#>  1 1993:1:DEN:DAN2:GRT:1:1            0
#>  2 1993:1:DEN:DAN2:GRT:3:2            0
#>  3 1993:1:GFR:SOL:H20:60:42           0
#>  4 1993:1:LAT:LAIZ:LBT:1:1            0
#>  5 1993:1:LAT:LAIZ:LBT:1:10           0
#>  6 1993:1:LAT:LAIZ:LBT:1:11           0
#>  7 1993:1:LAT:LAIZ:LBT:1:12           0
#>  8 1993:1:LAT:LAIZ:LBT:1:13           0
#>  9 1993:1:LAT:LAIZ:LBT:1:14           0
#> 10 1993:1:LAT:LAIZ:LBT:1:15           0
#> # … with 1,975 more rows
  
cod_0plot <- hlcodL %>%
  group_by(haul.id, Year, Quarter) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  group_by(Year, Quarter, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_zero_catch_hauls = Y/(N+Y)) %>% 
  ggplot(., aes(Year, prop_zero_catch_hauls)) + geom_bar(stat = "identity") + 
  coord_cartesian(expand = 0, ylim = c(0, 1)) + 
  facet_wrap(~ Quarter) +
  ggtitle("Cod")

# How many zero-catch hauls?
hlfleL %>%
  group_by(haul.id) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(CPUEun_haul == 0)
#> # A tibble: 1,990 × 2
#>    haul.id                    CPUEun_haul
#>    <chr>                            <dbl>
#>  1 1993:1:DEN:DAN2:GRT:1:1              0
#>  2 1993:1:DEN:DAN2:GRT:112:51           0
#>  3 1993:1:DEN:DAN2:GRT:113:52           0
#>  4 1993:1:DEN:DAN2:GRT:116:55           0
#>  5 1993:1:DEN:DAN2:GRT:136:62           0
#>  6 1993:1:DEN:DAN2:GRT:3:2              0
#>  7 1993:1:DEN:DAN2:GRT:54:25            0
#>  8 1993:1:DEN:DAN2:GRT:68:29            0
#>  9 1993:1:DEN:DAN2:GRT:70:30            0
#> 10 1993:1:GFR:SOL:H20:54:60             0
#> # … with 1,980 more rows

fle_0plot <- hlfleL %>%
  group_by(haul.id, Year, Quarter) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  group_by(Year, Quarter, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_zero_catch_hauls = Y/(N+Y)) %>% 
  ggplot(., aes(Year, prop_zero_catch_hauls)) + geom_bar(stat = "identity") + 
  coord_cartesian(expand = 0, ylim = c(0, 1)) + 
  facet_wrap(~ Quarter) +
  ggtitle("Flounder")

cod_0plot / fle_0plot
#> Warning: Removed 12 rows containing missing values (position_stack).
#> Warning: Removed 12 rows containing missing values (position_stack).

Standardize according to Orio

To get unit: kg of fish caught by trawling for 1 h a standard bottom swept area of 0.45km2 using a TVL trawl with 75 m sweeps at the standard speed of three knots

# Remove hauls done with the TVL gear with a SweepLngt < 50 (these are calibration hauls, pers. com. Anders & Ale)
# And also hauls without length-information
# Remove pelagic gear
hlcodL <- hlcodL %>%
  mutate(SweepLngt2 = replace_na(SweepLngt, 50)) %>% 
  mutate(keep = ifelse(Gear == "TVL" & SweepLngt2 < 50, "N", "Y")) %>% 
  filter(keep == "Y") %>% 
  dplyr::select(-keep, -SweepLngt2) %>% 
  filter(!Gear == "PEL")
  
hlfleL <- hlfleL %>%
  mutate(SweepLngt2 = replace_na(SweepLngt, 50)) %>% 
  mutate(keep = ifelse(Gear == "TVL" & SweepLngt2 < 50, "N", "Y")) %>% 
  filter(keep == "Y") %>% 
  dplyr::select(-keep, -SweepLngt2) %>% 
  filter(!Gear == "PEL")

# Add in RS and RSA-values from the sweep file
# CPUE should be multiplied with RS and RSA to standardize to a relative speed and gear dimension.
# There is not a single file will all RS and RSA values. Instead they come in three files:
# - sweep (non-Swedish hauls between 1991-2016)
# - + calculated based on trawl speed and gear dimensions.
# I will join in the RS and RSA values from all sources, then standardize and filter
# away non-standardized hauls
# sort(unique(sweep$Year))
# sort(unique(sweep$Country))

# Since I don't have the sweep data for Swedish data, I have to calculate it from scratch using the 
# equation in Orio's spreadsheet

# First I will join in the sweep data, 
sweep_sel <- sweep %>% rename("haul.id" = "ï..haul.id") %>% dplyr::select(haul.id, RSA, RS)

hlcodL2 <- left_join(hlcodL, sweep_sel)
hlfleL2 <- left_join(hlfleL, sweep_sel)

hlcodL2 <- hlcodL2 %>%
  rename("RS_sweep" = "RS",
         "RSA_sweep" = "RSA") %>% 
  mutate(RS_sweep = as.numeric(RS_sweep),
         RSA_sweep = as.numeric(RSA_sweep))

hlfleL2 <- hlfleL2 %>%
  rename("RS_sweep" = "RS",
         "RSA_sweep" = "RSA") %>% 
  mutate(RS_sweep = as.numeric(RS_sweep),
         RSA_sweep = as.numeric(RSA_sweep))

sort(colnames(hlcodL2))
#>  [1] "a"                 "b"                 "BySpecRecCode"    
#>  [4] "CatCatchWgt"       "CatIdentifier"     "Country"          
#>  [7] "CPUEun"            "CPUEun_kg"         "DataType"         
#> [10] "DateofCalculation" "DevStage"          "DoorType"         
#> [13] "Fishing.line"      "Gear"              "GearEx"           
#> [16] "GroundSpeed"       "haul.id"           "HaulDur"          
#> [19] "HaulNo"            "HaulVal"           "HLNoAtLngt"       
#> [22] "id_haul_stomach"   "IDx"               "length_cm"        
#> [25] "length_cm2"        "LenMeasType"       "LngtClass"        
#> [28] "LngtCode"          "Month"             "NoMeas"           
#> [31] "Quarter"           "RecordType"        "Rect"             
#> [34] "RS_sweep"          "RSA_sweep"         "Sex"              
#> [37] "Ship"              "Ship2"             "Ship3"            
#> [40] "ShootLat"          "ShootLong"         "SpecCode"         
#> [43] "SpecCodeType"      "Species"           "SpecVal"          
#> [46] "StdSpecRecCode"    "StNo"              "sub_div"          
#> [49] "SubFactor"         "SubWgt"            "Survey"           
#> [52] "SweepLngt"         "TotalNo"           "Valid_Aphia"      
#> [55] "weight_kg"         "Year"
sort(colnames(hlfleL2))
#>  [1] "a"                 "b"                 "BySpecRecCode"    
#>  [4] "CatCatchWgt"       "CatIdentifier"     "Country"          
#>  [7] "CPUEun"            "CPUEun_kg"         "DataType"         
#> [10] "DateofCalculation" "DevStage"          "DoorType"         
#> [13] "Fishing.line"      "Gear"              "GearEx"           
#> [16] "GroundSpeed"       "haul.id"           "HaulDur"          
#> [19] "HaulNo"            "HaulVal"           "HLNoAtLngt"       
#> [22] "id_haul_stomach"   "IDx"               "length_cm"        
#> [25] "length_cm2"        "LenMeasType"       "LngtClass"        
#> [28] "LngtCode"          "Month"             "NoMeas"           
#> [31] "Quarter"           "RecordType"        "Rect"             
#> [34] "RS_sweep"          "RSA_sweep"         "Sex"              
#> [37] "Ship"              "Ship2"             "Ship3"            
#> [40] "ShootLat"          "ShootLong"         "SpecCode"         
#> [43] "SpecCodeType"      "Species"           "SpecVal"          
#> [46] "StdSpecRecCode"    "StNo"              "sub_div"          
#> [49] "SubFactor"         "SubWgt"            "Survey"           
#> [52] "SweepLngt"         "TotalNo"           "Valid_Aphia"      
#> [55] "weight_kg"         "Year"

# I will calculate a RS and RSA column in the catch data based on Ale's equation in the sweep file:
sort(unique(hlcodL2$GroundSpeed))
#>  [1] -9.0  0.1  0.2  0.8  1.7  1.8  2.0  2.1  2.2  2.3  2.4  2.5  2.6  2.7  2.8
#> [16]  2.9  3.0  3.1  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9  4.0  4.1  4.2  4.3
#> [31]  4.4  4.5  4.6  4.7  4.9  5.0  5.2  5.3  5.4  5.5  5.6  5.7  5.9  6.0  6.1
#> [46]  6.2  6.3  6.6  6.7  6.8  6.9  7.1  7.3  8.6
sort(unique(hlcodL2$Fishing.line))
#> [1]  -9.00  28.00  33.22  36.00  39.80  63.46  83.00 160.00
sort(unique(hlcodL2$SweepLngt))
#>  [1]   0  40  50  60  75  87  90  95 100 110 135 180 185 200 203 225 235

# First replace -9 in the columns I use for the calculations with NA so I don't end up with real numbers that are wrong!
hlcodL2 <- hlcodL2 %>% mutate(GroundSpeed = ifelse(GroundSpeed == -9, NA, GroundSpeed),
                              Fishing.line = ifelse(Fishing.line == -9, NA, Fishing.line),
                              SweepLngt = ifelse(SweepLngt == -9, NA, SweepLngt))

hlfleL2 <- hlfleL2 %>% mutate(GroundSpeed = ifelse(GroundSpeed == -9, NA, GroundSpeed),
                              Fishing.line = ifelse(Fishing.line == -9, NA, Fishing.line),
                              SweepLngt = ifelse(SweepLngt == -9, NA, SweepLngt))

hlcodL2 %>% filter(Quarter == 1) %>%
  distinct(GroundSpeed, Fishing.line, SweepLngt) %>% as.data.frame() %>% head(50)
#>    SweepLngt Fishing.line GroundSpeed
#> 1         60           NA          NA
#> 2        110           NA          NA
#> 3         NA         36.0         3.8
#> 4         NA         36.0         3.6
#> 5         NA         36.0         4.0
#> 6         NA         36.0         4.6
#> 7         NA         36.0         3.4
#> 8         NA         36.0         4.2
#> 9         NA         36.0         3.2
#> 10        NA         36.0         2.6
#> 11        NA         36.0         2.8
#> 12        50        160.0         3.1
#> 13        50        160.0         2.9
#> 14        50        160.0         3.0
#> 15        50        160.0         2.6
#> 16        50        160.0         3.2
#> 17       185         83.0         3.1
#> 18       185         83.0         3.0
#> 19        NA         39.8          NA
#> 20       185         83.0         2.8
#> 21       185         83.0         3.4
#> 22       185         83.0         3.3
#> 23       185         83.0         3.5
#> 24       185         83.0         3.2
#> 25       185         83.0         2.7
#> 26       185         83.0         2.9
#> 27        NA         36.0         4.4
#> 28        NA         36.0         3.0
#> 29        NA         36.0         2.4
#> 30       180         83.0         3.1
#> 31       180         83.0         3.2
#> 32       180         83.0         3.0
#> 33       180         83.0         3.3
#> 34        50        160.0         3.4
#> 35       180         83.0         3.4
#> 36       180         83.0         3.5
#> 37       180         83.0         2.8
#> 38       180         83.0         3.9
#> 39        50        160.0         3.3
#> 40        NA           NA          NA
#> 41        NA         36.0         3.7
#> 42        NA         36.0         3.1
#> 43        NA           NA         3.2
#> 44        NA           NA         3.3
#> 45        NA           NA         3.6
#> 46        NA           NA         3.4
#> 47        90        160.0         3.4
#> 48        NA           NA         3.0
#> 49        NA           NA         3.8
#> 50        NA           NA         3.9

hlcodL2 %>% filter(Quarter == 4) %>%
  distinct(GroundSpeed, Fishing.line, SweepLngt) %>% as.data.frame() %>% head(50)
#>    SweepLngt Fishing.line GroundSpeed
#> 1        180           83         2.9
#> 2         NA           36         3.8
#> 3        180           83         2.8
#> 4        180           83         3.1
#> 5         NA           36         3.4
#> 6         NA           36         4.0
#> 7         NA           36         4.2
#> 8        180           83         3.2
#> 9        180           83         3.3
#> 10        NA           36         3.2
#> 11        NA           36         3.6
#> 12       180           83         3.0
#> 13        NA           36         3.0
#> 14       185           83         2.8
#> 15       185           83         3.0
#> 16       185           83         3.2
#> 17       185           83         3.3
#> 18       185           83         3.1
#> 19       185           83         3.5
#> 20       185           83         3.4
#> 21       185           83         3.6
#> 22        NA           36          NA
#> 23       203           83         3.6
#> 24       203           83         3.7
#> 25       203           83         3.5
#> 26       203           83         3.8
#> 27       203           83         3.9
#> 28       203           83         3.4
#> 29       100          160         3.4
#> 30       100           83         3.5
#> 31       100           83         3.4
#> 32       225           83         3.6
#> 33       225           83         3.5
#> 34       100           83         4.0
#> 35       100          160         3.5
#> 36       100           83         3.7
#> 37       100           83         3.3
#> 38       100           83         3.2
#> 39       100           83         3.6
#> 40       225           83         3.3
#> 41       225           83         3.4
#> 42       180           83         3.6
#> 43        NA           28          NA
#> 44       225           83         3.2
#> 45       180           83         3.4
#> 46       180           83         3.5
#> 47       225           83         3.7
#> 48       225           83         3.8
#> 49       185           83         3.8
#> 50       225           83         3.9

# Hmm, Q1 has at least one of the RS or RSA variables as NAs. Will be difficult to standardize!
# Hope the correction factors are present in Ales conversion data

# Now calculate correction factors
hlcodL2 <- hlcodL2 %>% mutate(RS_x = 3/GroundSpeed,
                              Horizontal.opening..m. = Fishing.line*0.67,
                              Swep.one.side..after.formula...meter = 0.258819045*SweepLngt, # SIN(RADIANS(15))
                              Size.final..m = Horizontal.opening..m. + (Swep.one.side..after.formula...meter*2),
                              Swept.area = (Size.final..m*3*1860)/1000000,
                              RSA_x = 0.45388309675081/Swept.area)

hlfleL2 <- hlfleL2 %>% mutate(RS_x = 3/GroundSpeed,
                              Horizontal.opening..m. = Fishing.line*0.67,
                              Swep.one.side..after.formula...meter = 0.258819045*SweepLngt, # SIN(RADIANS(15))
                              Size.final..m = Horizontal.opening..m. + (Swep.one.side..after.formula...meter*2),
                              Swept.area = (Size.final..m*3*1860)/1000000,
                              RSA_x = 0.45388309675081/Swept.area)

# Check EQ. is correct by recalculating it in the sweep file
sweep <- sweep %>% mutate(Horizontal.opening..m.2 = Fishing.line*0.67,
                          Swep.one.side..after.formula...meter2 = 0.258819045*SweepLngt, # SIN(RADIANS(15))
                          Size.final..m2 = Horizontal.opening..m.2 + (Swep.one.side..after.formula...meter2*2),
                          Swept.area2 = (Size.final..m2*3*1860)/1000000,
                          RSA_x = 0.45388309675081/Swept.area2)

sweep %>%
  drop_na() %>%
  ggplot(., aes(as.numeric(RSA), RSA_x)) + geom_point() + geom_abline(intercept = 0, slope = 1)

# Yes it's the same

# Replace NAs with -1/3 (because ICES codes missing values as -9 and in the calculation above they get -1/3),
# so that I can filter them easily later
# sort(unique(hlcodL2$RS_x))
# sort(unique(hlcodL2$RSA_x))

hlcodL2$RS_x[is.na(hlcodL2$RS_x)] <- -1/3
hlcodL2$RS_sweep[is.na(hlcodL2$RS_sweep)] <- -1/3
hlcodL2$RSA_x[is.na(hlcodL2$RSA_x)] <- -1/3
hlcodL2$RSA_sweep[is.na(hlcodL2$RSA_sweep)] <- -1/3

hlfleL2$RS_x[is.na(hlfleL2$RS_x)] <- -1/3
hlfleL2$RS_sweep[is.na(hlfleL2$RS_sweep)] <- -1/3
hlfleL2$RSA_x[is.na(hlfleL2$RSA_x)] <- -1/3
hlfleL2$RSA_sweep[is.na(hlfleL2$RSA_sweep)] <- -1/3

# Compare the difference correction factors (calculated vs imported from sweep file)
p1 <- ggplot(filter(hlcodL2, RS_x > 0), aes(RS_x)) + geom_histogram() + xlim(0.4, 1.76)
p2 <- ggplot(hlcodL2, aes(RSA_x)) + geom_histogram()
p3 <- ggplot(hlcodL2, aes(RS_sweep)) + geom_histogram()
p4 <- ggplot(hlcodL2, aes(RSA_sweep)) + geom_histogram()

(p1 + p2) / (p3 + p4)
#> Warning: Removed 284 rows containing non-finite values (stat_bin).
#> Warning: Removed 2 rows containing missing values (geom_bar).


p5 <- ggplot(filter(hlfleL2, RS_x > 0), aes(RS_x)) + geom_histogram() + xlim(0.4, 1.76)
p6 <- ggplot(hlfleL2, aes(RSA_x)) + geom_histogram()
p7 <- ggplot(hlfleL2, aes(RS_sweep)) + geom_histogram()
p8 <- ggplot(hlfleL2, aes(RSA_sweep)) + geom_histogram()

(p5 + p6) / (p7 + p8)
#> Warning: Removed 115 rows containing non-finite values (stat_bin).
#> Removed 2 rows containing missing values (geom_bar).


# Why do I have RSA values smaller than one? (either because sweep length is longer or gear is larger (GOV))
# Check if I can calculate the same RSA in sweep as that entered there.
# Ok, so the equation is correct. Which ID's have RSA < 1?
hlcodL2 %>% 
  filter(RSA_x < 1 & RSA_x > 0) %>%
  dplyr::select(Year, Country, Ship, Gear, haul.id, Horizontal.opening..m., Fishing.line,
                Swep.one.side..after.formula...meter, SweepLngt, Size.final..m, Swept.area, RSA_x) %>% 
  ggplot(., aes(RSA_x, fill = factor(SweepLngt))) + geom_histogram() + facet_wrap(~Gear, ncol = 1)
#> Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
#> Returning the palette you asked for with that many colors


# Check if I have more than one unique RS or RSA value per haul, or if it's "either this or that"
# Filter positive in both columns
hlcodL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlcodL2 %>% filter(RSA_x > 0 & RSA_sweep > 0) %>% ggplot(., aes(RSA_x, RSA_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlfleL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlfleL2 %>% filter(RSA_x > 0 & RSA_sweep > 0) %>% ggplot(., aes(RSA_x, RSA_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


# Ok, there's on odd RS_x that is larger than 3. It didn't catch anything and speed is 0.8! Will remove
hlcodL2 <- hlcodL2 %>% filter(RS_x < 3)
hlfleL2 <- hlfleL2 %>% filter(RS_x < 3)

# Plot again
hlcodL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlfleL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


# They are largely the same when they overlap. When they differ, I will use RS_sweep
# Make a single RS and RSA column

# Cod 
hlcodL3 <- hlcodL2 %>%
  mutate(RS = -99,
         RS = ifelse(RS_sweep > 0, RS_sweep, RS),
         RS = ifelse(RS < 0 & RS_x > 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
  mutate(RSA = -99,
         RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
         RSA = ifelse(RSA < 0 & RSA_x > 0, RSA_x, RSA)) %>%
  filter(RS > 0) %>%
  filter(RSA > 0) %>% 
  mutate(RSRSA = RS*RSA)

# Plot
ggplot(hlcodL3, aes(RSRSA)) + geom_histogram()


hlfleL2 %>% filter(Country == "LAT") %>% distinct(Year) %>% arrange(Year)
#> # A tibble: 28 × 1
#>     Year
#>    <int>
#>  1  1993
#>  2  1994
#>  3  1995
#>  4  1996
#>  5  1997
#>  6  1998
#>  7  1999
#>  8  2000
#>  9  2001
#> 10  2002
#> # … with 18 more rows

# Flounder 
hlfleL3 <- hlfleL2 %>%
  mutate(RS = -999,
         RS = ifelse(RS_sweep > 0, RS_sweep, RS),
         RS = ifelse(RS < 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
  mutate(RSA = -999,
         RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
         RSA = ifelse(RSA < 0, RSA_x, RSA)) %>% 
  filter(RS > 0) %>%
  filter(RSA > 0) %>% 
  mutate(RSRSA = RS*RSA)

# Test how many years of LAT data I miss out on because I can't standardize it.
# hlfleL2 %>%
#   mutate(RS = -999,
#          RS = ifelse(RS_sweep > 0, RS_sweep, RS),
#          RS = ifelse(RS < 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
#   filter(RS > 0) %>% 
#   filter(Country == "LAT") %>% 
#   distinct(Year) %>% 
#   arrange(Year)
#   
# hlfleL2 %>%
#   mutate(RSA = -999,
#          RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
#          RSA = ifelse(RSA < 0, RSA_x, RSA)) %>% 
#   filter(RSA > 0) %>% 
#   filter(Country == "LAT") %>% 
#   distinct(Year) %>% 
#   arrange(Year)

# Plot
ggplot(hlcodL3, aes(RSRSA)) + geom_histogram()


# Standardize!
hlcodL3 <- hlcodL3 %>%
  mutate(CPUEst_kg = CPUEun_kg*RS*RSA,
         CPUEst = CPUEun*RS*RSA)

hlfleL3 <- hlfleL3 %>%
  mutate(CPUEst_kg = CPUEun_kg*RS*RSA,
         CPUEst = CPUEun*RS*RSA)
  
unique(is.na(hlcodL3$CPUEst_kg))
#> [1] FALSE
unique(is.na(hlcodL3$CPUEst))
#> [1] FALSE
min(hlcodL3$CPUEst_kg)
#> [1] 0
min(hlcodL3$CPUEst)
#> [1] 0

unique(is.na(hlfleL3$CPUEst_kg)) # Remove the few NA's here
#> [1] FALSE
hlfleL3 <- hlfleL3 %>% drop_na(CPUEst_kg)
unique(is.na(hlfleL3$CPUEst))
#> [1] FALSE
min(hlfleL3$CPUEst_kg) 
#> [1] 0
min(hlfleL3$CPUEst)
#> [1] 0

# Now calculate CPUE PER LENGTH CLASS, then create the new unit, i.e.:  convert from kg of fish caught by trawling for 1 h a standard bottom swept area of 0.45km2 (using a TVL trawl with 75 m sweeps at the standard speed of three knots) to... kg of fish per km^2 by dividing with 0.45

p1 <- ggplot(hlcodL3) +
  geom_histogram(aes(length_cm2, fill = "length_cm1"), alpha = 0.5)  

p2 <- ggplot(hlcodL3) +
  geom_histogram(aes(length_cm2, fill = "length_cm2"), alpha = 0.5) 

p1/p2


hlcodhaul <- hlcodL3 %>%
  mutate(cpue_kg = CPUEst_kg,
         cpue = CPUEst,
         cpue_kg_un = CPUEun_kg,
         cpue_un = CPUEun,
         kg = cpue_kg * (HaulDur / 60),
         abund = cpue * (HaulDur / 60),
         density = kg/0.45,
         density_ab = abund/0.45)

# t <- hlcodhaul %>% filter(haul_cpue_un == 0)
# t <- hlcodhaul %>% filter(!Country == "SWE") %>% filter(haul_cpue_un > 0)

hlflehaul <- hlfleL3 %>%
  mutate(cpue_kg = CPUEst_kg,
         cpue = CPUEst,
         cpue_kg_un = CPUEun_kg,
         cpue_un = CPUEun,
         kg = cpue_kg * (HaulDur / 60),
         abund = cpue * (HaulDur / 60),
         density = kg/0.45,
         density_ab = abund/0.45)

# First, figure out why i have length = 0 and density = 0 when I have other lengths in the haul
hlcodhaul %>% filter(haul.id == "1993:1:GFR:SOL:H20:23:31") %>% as.data.frame()
#>   RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType StNo
#> 1         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 2         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 3         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 4         HH   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#>   HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier NoMeas
#> 1     31 1993            W   126436         1 <NA>       6             1      3
#> 2     31 1993            W   126436         1 <NA>       6             1      3
#> 3     31 1993            W   126436         1 <NA>       6             1      3
#> 4     31 1993         <NA>       NA zeroCatch <NA>      NA            NA     NA
#>   SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1         1     NA          25        1         6          2     <NA>
#> 2         1     NA          25        1        35          2     <NA>
#> 3         1     NA          25        1        39          2     <NA>
#> 4        NA     NA          NA     <NA>        NA         NA     <NA>
#>   LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1          NA          20211203      126436   SOL   SOL
#> 2          NA          20211203      126436   SOL   SOL
#> 3          NA          20211203      126436   SOL   SOL
#> 4          NA          20220301          NA   SOL   SOL
#>                         IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 2 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 3 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 4 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#>   Fishing.line Month DataType HaulDur GroundSpeed                  haul.id
#> 1           36     2        C      30           4 1993:1:GFR:SOL:H20:23:31
#> 2           36     2        C      30           4 1993:1:GFR:SOL:H20:23:31
#> 3           36     2        C      30           4 1993:1:GFR:SOL:H20:23:31
#> 4           36     2        C      30           4 1993:1:GFR:SOL:H20:23:31
#>   ShootLat ShootLong      id_haul_stomach      Species CPUEun           a
#> 1  54.5167     14.15 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 2  54.5167     14.15 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 3  54.5167     14.15 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 4  54.5167     14.15                 <NA> Gadus morhua      0 0.007244481
#>        b length_cm length_cm2   weight_kg   CPUEun_kg RSA_sweep RS_sweep RS_x
#> 1 3.1008         6          6 0.001874558 0.003749116      1.47     0.75 0.75
#> 2 3.1008        35         35 0.444481726 0.888963452      1.47     0.75 0.75
#> 3 3.1008        39         39 0.621699998 1.243399996      1.47     0.75 0.75
#> 4 3.1008        NA          0 0.000000000 0.000000000      1.47     0.75 0.75
#>   Horizontal.opening..m. Swep.one.side..after.formula...meter Size.final..m
#> 1                  24.12                                   NA            NA
#> 2                  24.12                                   NA            NA
#> 3                  24.12                                   NA            NA
#> 4                  24.12                                   NA            NA
#>   Swept.area      RSA_x   RS  RSA  RSRSA CPUEst_kg CPUEst   cpue_kg  cpue
#> 1         NA -0.3333333 0.75 1.47 1.1025 0.0041334  2.205 0.0041334 2.205
#> 2         NA -0.3333333 0.75 1.47 1.1025 0.9800822  2.205 0.9800822 2.205
#> 3         NA -0.3333333 0.75 1.47 1.1025 1.3708485  2.205 1.3708485 2.205
#> 4         NA -0.3333333 0.75 1.47 1.1025 0.0000000  0.000 0.0000000 0.000
#>    cpue_kg_un cpue_un        kg  abund     density density_ab
#> 1 0.003749116       2 0.0020667 1.1025 0.004592667       2.45
#> 2 0.888963452       2 0.4900411 1.1025 1.088980229       2.45
#> 3 1.243399996       2 0.6854242 1.1025 1.523164995       2.45
#> 4 0.000000000       0 0.0000000 0.0000 0.000000000       0.00

hlcodhaul %>%
  group_by(haul.id) %>% 
  mutate(no_catches = length(unique(CPUEun))) %>% 
  filter(any(CPUEun == 0)) %>% 
  filter(no_catches > 1) %>% 
  as.data.frame() %>% 
  head(50)
#>    RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType StNo
#> 1          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 2          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 3          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 4          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 5          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 6          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 7          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 8          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 9          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 10         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 11         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 12         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 13         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 14         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 15         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 16         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 17         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 18         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 19         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 20         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 21         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 22         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 23         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   22
#> 24         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   22
#> 25         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 26         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 27         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 28         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   24
#> 29         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   24
#> 30         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   24
#> 31         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   24
#> 32         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   24
#> 33         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 34         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 35         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 36         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 37         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 38         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 39         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 40         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 41         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 42         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 43         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 44         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 45         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 46         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 47         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 48         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 49         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 50         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#>    HaulNo Year SpecCodeType SpecCode SpecVal  Sex TotalNo CatIdentifier NoMeas
#> 1       1 1993            W   126436       1 <NA>     110             1     55
#> 2       1 1993            W   126436       1 <NA>     110             1     55
#> 3       1 1993            W   126436       1 <NA>     110             1     55
#> 4       1 1993            W   126436       1 <NA>     110             1     55
#> 5       1 1993            W   126436       1 <NA>     110             1     55
#> 6       1 1993            W   126436       1 <NA>     110             1     55
#> 7       1 1993            W   126436       1 <NA>     110             1     55
#> 8       1 1993            W   126436       1 <NA>     110             1     55
#> 9       1 1993            W   126436       1 <NA>     110             1     55
#> 10      1 1993            W   126436       1 <NA>     110             1     55
#> 11      1 1993            W   126436       1 <NA>     110             1     55
#> 12      1 1993            W   126436       1 <NA>     110             1     55
#> 13      1 1993            W   126436       1 <NA>     110             1     55
#> 14      1 1993            W   126436       1 <NA>     110             1     55
#> 15      1 1993            W   126436       1 <NA>     110             1     55
#> 16      1 1993            W   126436       1 <NA>     110             1     55
#> 17      1 1993            W   126436       1 <NA>     110             1     55
#> 18      1 1993            W   126436       1 <NA>     110             1     55
#> 19      1 1993            W   126436       1 <NA>     110             1     55
#> 20      1 1993            W   126436       1 <NA>     110             1     55
#> 21      1 1993            W   126436       1 <NA>     110             1     55
#> 22      1 1993            W   126436       1 <NA>     110             1     55
#> 23     32 1993            W   126436       1 <NA>       4             1      2
#> 24     32 1993            W   126436       1 <NA>       4             1      2
#> 25     31 1993            W   126436       1 <NA>       6             1      3
#> 26     31 1993            W   126436       1 <NA>       6             1      3
#> 27     31 1993            W   126436       1 <NA>       6             1      3
#> 28     30 1993            W   126436       1 <NA>      10             1      5
#> 29     30 1993            W   126436       1 <NA>      10             1      5
#> 30     30 1993            W   126436       1 <NA>      10             1      5
#> 31     30 1993            W   126436       1 <NA>      10             1      5
#> 32     30 1993            W   126436       1 <NA>      10             1      5
#> 33      2 1993            W   126436       1 <NA>     998             1    153
#> 34      2 1993            W   126436       1 <NA>     998             1    153
#> 35      2 1993            W   126436       1 <NA>     998             1    153
#> 36      2 1993            W   126436       1 <NA>     998             1    153
#> 37      2 1993            W   126436       1 <NA>     998             1    153
#> 38      2 1993            W   126436       1 <NA>     998             1    153
#> 39      2 1993            W   126436       1 <NA>     998             1    153
#> 40      2 1993            W   126436       1 <NA>     998             1    153
#> 41      2 1993            W   126436       1 <NA>     998             1    153
#> 42      2 1993            W   126436       1 <NA>     998             1    153
#> 43      2 1993            W   126436       1 <NA>     998             1    153
#> 44      2 1993            W   126436       1 <NA>     998             1    153
#> 45      2 1993            W   126436       1 <NA>     998             1    153
#> 46      2 1993            W   126436       1 <NA>     998             1    153
#> 47      2 1993            W   126436       1 <NA>     998             1    153
#> 48      2 1993            W   126436       1 <NA>     998             1    153
#> 49      2 1993            W   126436       1 <NA>     998             1    153
#> 50      2 1993            W   126436       1 <NA>     998             1    153
#>    SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1          1     NA         680        1        14          4     <NA>
#> 2          1     NA         680        1        16          4     <NA>
#> 3          1     NA         680        1        18          4     <NA>
#> 4          1     NA         680        1        27          8     <NA>
#> 5          1     NA         680        1        28          8     <NA>
#> 6          1     NA         680        1        29          8     <NA>
#> 7          1     NA         680        1        58          2     <NA>
#> 8          1     NA         680        1        59          2     <NA>
#> 9          1     NA         680        1        60          2     <NA>
#> 10         1     NA         680        1        65          2     <NA>
#> 11         1     NA         680        1        37          2     <NA>
#> 12         1     NA         680        1        44          4     <NA>
#> 13         1     NA         680        1        48          2     <NA>
#> 14         1     NA         680        1        49          2     <NA>
#> 15         1     NA         680        1        50          4     <NA>
#> 16         1     NA         680        1        53          2     <NA>
#> 17         1     NA         680        1        30          8     <NA>
#> 18         1     NA         680        1        31         10     <NA>
#> 19         1     NA         680        1        32          8     <NA>
#> 20         1     NA         680        1        33         14     <NA>
#> 21         1     NA         680        1        34          6     <NA>
#> 22         1     NA         680        1        35          4     <NA>
#> 23         1     NA          41        1        42          2     <NA>
#> 24         1     NA          41        1        47          2     <NA>
#> 25         1     NA          25        1         6          2     <NA>
#> 26         1     NA          25        1        35          2     <NA>
#> 27         1     NA          25        1        39          2     <NA>
#> 28         1     NA          78        1        33          2     <NA>
#> 29         1     NA          78        1        35          2     <NA>
#> 30         1     NA          78        1        37          2     <NA>
#> 31         1     NA          78        1        44          2     <NA>
#> 32         1     NA          78        1        52          2     <NA>
#> 33         1     NA        3260        1        20          7     <NA>
#> 34         1     NA        3260        1        21         13     <NA>
#> 35         1     NA        3260        1        24         13     <NA>
#> 36         1     NA        3260        1        25         46     <NA>
#> 37         1     NA        3260        1        26         46     <NA>
#> 38         1     NA        3260        1        27        130     <NA>
#> 39         1     NA        3260        1        41          7     <NA>
#> 40         1     NA        3260        1        46         13     <NA>
#> 41         1     NA        3260        1        47          7     <NA>
#> 42         1     NA        3260        1        49          7     <NA>
#> 43         1     NA        3260        1        34         52     <NA>
#> 44         1     NA        3260        1        35         26     <NA>
#> 45         1     NA        3260        1        36         20     <NA>
#> 46         1     NA        3260        1        37          7     <NA>
#> 47         1     NA        3260        1        38         13     <NA>
#> 48         1     NA        3260        1        39          7     <NA>
#> 49         1     NA        3260        1        28        130     <NA>
#> 50         1     NA        3260        1        29        117     <NA>
#>    LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1           NA          20211203      126436   SOL   SOL
#> 2           NA          20211203      126436   SOL   SOL
#> 3           NA          20211203      126436   SOL   SOL
#> 4           NA          20211203      126436   SOL   SOL
#> 5           NA          20211203      126436   SOL   SOL
#> 6           NA          20211203      126436   SOL   SOL
#> 7           NA          20211203      126436   SOL   SOL
#> 8           NA          20211203      126436   SOL   SOL
#> 9           NA          20211203      126436   SOL   SOL
#> 10          NA          20211203      126436   SOL   SOL
#> 11          NA          20211203      126436   SOL   SOL
#> 12          NA          20211203      126436   SOL   SOL
#> 13          NA          20211203      126436   SOL   SOL
#> 14          NA          20211203      126436   SOL   SOL
#> 15          NA          20211203      126436   SOL   SOL
#> 16          NA          20211203      126436   SOL   SOL
#> 17          NA          20211203      126436   SOL   SOL
#> 18          NA          20211203      126436   SOL   SOL
#> 19          NA          20211203      126436   SOL   SOL
#> 20          NA          20211203      126436   SOL   SOL
#> 21          NA          20211203      126436   SOL   SOL
#> 22          NA          20211203      126436   SOL   SOL
#> 23          NA          20211203      126436   SOL   SOL
#> 24          NA          20211203      126436   SOL   SOL
#> 25          NA          20211203      126436   SOL   SOL
#> 26          NA          20211203      126436   SOL   SOL
#> 27          NA          20211203      126436   SOL   SOL
#> 28          NA          20211203      126436   SOL   SOL
#> 29          NA          20211203      126436   SOL   SOL
#> 30          NA          20211203      126436   SOL   SOL
#> 31          NA          20211203      126436   SOL   SOL
#> 32          NA          20211203      126436   SOL   SOL
#> 33          NA          20211203      126436   SOL   SOL
#> 34          NA          20211203      126436   SOL   SOL
#> 35          NA          20211203      126436   SOL   SOL
#> 36          NA          20211203      126436   SOL   SOL
#> 37          NA          20211203      126436   SOL   SOL
#> 38          NA          20211203      126436   SOL   SOL
#> 39          NA          20211203      126436   SOL   SOL
#> 40          NA          20211203      126436   SOL   SOL
#> 41          NA          20211203      126436   SOL   SOL
#> 42          NA          20211203      126436   SOL   SOL
#> 43          NA          20211203      126436   SOL   SOL
#> 44          NA          20211203      126436   SOL   SOL
#> 45          NA          20211203      126436   SOL   SOL
#> 46          NA          20211203      126436   SOL   SOL
#> 47          NA          20211203      126436   SOL   SOL
#> 48          NA          20211203      126436   SOL   SOL
#> 49          NA          20211203      126436   SOL   SOL
#> 50          NA          20211203      126436   SOL   SOL
#>                          IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 2   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 3   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 4   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 5   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 6   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 7   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 8   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 9   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 10  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 11  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 12  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 13  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 14  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 15  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 16  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 17  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 18  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 19  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 20  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 21  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 22  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 23 1993.1.GFR.06S1.H20.22.32      24 38G4       V              1             1
#> 24 1993.1.GFR.06S1.H20.22.32      24 38G4       V              1             1
#> 25 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 26 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 27 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 28 1993.1.GFR.06S1.H20.24.30      24 37G3       V              1             1
#> 29 1993.1.GFR.06S1.H20.24.30      24 37G3       V              1             1
#> 30 1993.1.GFR.06S1.H20.24.30      24 37G3       V              1             1
#> 31 1993.1.GFR.06S1.H20.24.30      24 37G3       V              1             1
#> 32 1993.1.GFR.06S1.H20.24.30      24 37G3       V              1             1
#> 33  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 34  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 35  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 36  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 37  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 38  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 39  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 40  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 41  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 42  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 43  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 44  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 45  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 46  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 47  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 48  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 49  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 50  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#>    Fishing.line Month DataType HaulDur GroundSpeed                  haul.id
#> 1            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 2            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 3            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 4            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 5            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 6            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 7            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 8            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 9            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 10           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 11           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 12           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 13           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 14           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 15           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 16           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 17           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 18           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 19           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 20           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 21           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 22           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 23           36     2        C      30         3.6 1993:1:GFR:SOL:H20:22:32
#> 24           36     2        C      30         3.6 1993:1:GFR:SOL:H20:22:32
#> 25           36     2        C      30         4.0 1993:1:GFR:SOL:H20:23:31
#> 26           36     2        C      30         4.0 1993:1:GFR:SOL:H20:23:31
#> 27           36     2        C      30         4.0 1993:1:GFR:SOL:H20:23:31
#> 28           36     2        C      30         3.8 1993:1:GFR:SOL:H20:24:30
#> 29           36     2        C      30         3.8 1993:1:GFR:SOL:H20:24:30
#> 30           36     2        C      30         3.8 1993:1:GFR:SOL:H20:24:30
#> 31           36     2        C      30         3.8 1993:1:GFR:SOL:H20:24:30
#> 32           36     2        C      30         3.8 1993:1:GFR:SOL:H20:24:30
#> 33           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 34           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 35           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 36           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 37           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 38           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 39           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 40           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 41           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 42           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 43           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 44           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 45           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 46           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 47           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 48           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 49           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 50           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#>    ShootLat ShootLong      id_haul_stomach      Species CPUEun           a
#> 1   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 2   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 3   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 4   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      8 0.007244481
#> 5   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      8 0.007244481
#> 6   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      8 0.007244481
#> 7   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 8   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 9   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 10  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 11  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 12  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 13  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 14  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 15  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 16  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 17  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      8 0.007244481
#> 18  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua     10 0.007244481
#> 19  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      8 0.007244481
#> 20  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua     14 0.007244481
#> 21  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      6 0.007244481
#> 22  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 23  54.5167   14.2667 1993.1.2.GFR.38G4.32 Gadus morhua      2 0.007244481
#> 24  54.5167   14.2667 1993.1.2.GFR.38G4.32 Gadus morhua      2 0.007244481
#> 25  54.5167   14.1500 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 26  54.5167   14.1500 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 27  54.5167   14.1500 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 28  54.4833   13.9833 1993.1.2.GFR.37G3.30 Gadus morhua      2 0.007244481
#> 29  54.4833   13.9833 1993.1.2.GFR.37G3.30 Gadus morhua      2 0.007244481
#> 30  54.4833   13.9833 1993.1.2.GFR.37G3.30 Gadus morhua      2 0.007244481
#> 31  54.4833   13.9833 1993.1.2.GFR.37G3.30 Gadus morhua      2 0.007244481
#> 32  54.4833   13.9833 1993.1.2.GFR.37G3.30 Gadus morhua      2 0.007244481
#> 33  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 34  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     13 0.007244481
#> 35  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     13 0.007244481
#> 36  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     46 0.007244481
#> 37  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     46 0.007244481
#> 38  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua    130 0.007244481
#> 39  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 40  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     13 0.007244481
#> 41  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 42  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 43  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     52 0.007244481
#> 44  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     26 0.007244481
#> 45  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     20 0.007244481
#> 46  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 47  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     13 0.007244481
#> 48  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 49  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua    130 0.007244481
#> 50  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua    117 0.007244481
#>         b length_cm length_cm2   weight_kg    CPUEun_kg RSA_sweep RS_sweep
#> 1  3.1008        14         14 0.025937100  0.103748400      1.47     0.79
#> 2  3.1008        16         16 0.039241253  0.156965010      1.47     0.79
#> 3  3.1008        18         18 0.056540106  0.226160424      1.47     0.79
#> 4  3.1008        27         27 0.198783552  1.590268412      1.47     0.79
#> 5  3.1008        28         28 0.222512961  1.780103692      1.47     0.79
#> 6  3.1008        29         29 0.248091227  1.984729814      1.47     0.79
#> 7  3.1008        58         58 2.128361064  4.256722129      1.47     0.79
#> 8  3.1008        59         59 2.244221418  4.488442837      1.47     0.79
#> 9  3.1008        60         60 2.364281503  4.728563006      1.47     0.79
#> 10 3.1008        65         65 3.030327268  6.060654536      1.47     0.79
#> 11 3.1008        37         37 0.528065302  1.056130604      1.47     0.79
#> 12 3.1008        44         44 0.903703173  3.614812692      1.47     0.79
#> 13 3.1008        48         48 1.183588094  2.367176188      1.47     0.79
#> 14 3.1008        49         49 1.261733885  2.523467769      1.47     0.79
#> 15 3.1008        50         50 1.343302904  5.373211615      1.47     0.79
#> 16 3.1008        53         53 1.609319924  3.218639848      1.47     0.79
#> 17 3.1008        30         30 0.275591162  2.204729295      1.47     0.79
#> 18 3.1008        31         31 0.305085835  3.050858346      1.47     0.79
#> 19 3.1008        32         32 0.336648559  2.693188473      1.47     0.79
#> 20 3.1008        33         33 0.370352888  5.184940437      1.47     0.79
#> 21 3.1008        34         34 0.406272607  2.437635642      1.47     0.79
#> 22 3.1008        35         35 0.444481726  1.777926905      1.47     0.79
#> 23 3.1008        42         42 0.782310468  1.564620936      1.47     0.83
#> 24 3.1008        47         47 1.108788707  2.217577415      1.47     0.83
#> 25 3.1008         6          6 0.001874558  0.003749116      1.47     0.75
#> 26 3.1008        35         35 0.444481726  0.888963452      1.47     0.75
#> 27 3.1008        39         39 0.621699998  1.243399996      1.47     0.75
#> 28 3.1008        33         33 0.370352888  0.740705777      1.47     0.79
#> 29 3.1008        35         35 0.444481726  0.888963452      1.47     0.79
#> 30 3.1008        37         37 0.528065302  1.056130604      1.47     0.79
#> 31 3.1008        44         44 0.903703173  1.807406346      1.47     0.79
#> 32 3.1008        52         52 1.517018710  3.034037419      1.47     0.79
#> 33 3.1008        20         20 0.078386533  0.548705733      1.47     0.83
#> 34 3.1008        21         21 0.091189586  1.185464615      1.47     0.83
#> 35 3.1008        24         24 0.137964290  1.793535765      1.47     0.83
#> 36 3.1008        25         25 0.156581358  7.202742459      1.47     0.83
#> 37 3.1008        26         26 0.176830444  8.134200440      1.47     0.83
#> 38 3.1008        27         27 0.198783552 25.841861699      1.47     0.83
#> 39 3.1008        41         41 0.725985458  5.081898208      1.47     0.83
#> 40 3.1008        46         46 1.037259063 13.484367814      1.47     0.83
#> 41 3.1008        47         47 1.108788707  7.761520951      1.47     0.83
#> 42 3.1008        49         49 1.261733885  8.832137193      1.47     0.83
#> 43 3.1008        34         34 0.406272607 21.126175567      1.47     0.83
#> 44 3.1008        35         35 0.444481726 11.556524879      1.47     0.83
#> 45 3.1008        36         36 0.485054476  9.701089523      1.47     0.83
#> 46 3.1008        37         37 0.528065302  3.696457113      1.47     0.83
#> 47 3.1008        38         38 0.573588857  7.456655137      1.47     0.83
#> 48 3.1008        39         39 0.621699998  4.351899986      1.47     0.83
#> 49 3.1008        28         28 0.222512961 28.926684992      1.47     0.83
#> 50 3.1008        29         29 0.248091227 29.026673534      1.47     0.83
#>         RS_x Horizontal.opening..m. Swep.one.side..after.formula...meter
#> 1  0.7894737                  24.12                                   NA
#> 2  0.7894737                  24.12                                   NA
#> 3  0.7894737                  24.12                                   NA
#> 4  0.7894737                  24.12                                   NA
#> 5  0.7894737                  24.12                                   NA
#> 6  0.7894737                  24.12                                   NA
#> 7  0.7894737                  24.12                                   NA
#> 8  0.7894737                  24.12                                   NA
#> 9  0.7894737                  24.12                                   NA
#> 10 0.7894737                  24.12                                   NA
#> 11 0.7894737                  24.12                                   NA
#> 12 0.7894737                  24.12                                   NA
#> 13 0.7894737                  24.12                                   NA
#> 14 0.7894737                  24.12                                   NA
#> 15 0.7894737                  24.12                                   NA
#> 16 0.7894737                  24.12                                   NA
#> 17 0.7894737                  24.12                                   NA
#> 18 0.7894737                  24.12                                   NA
#> 19 0.7894737                  24.12                                   NA
#> 20 0.7894737                  24.12                                   NA
#> 21 0.7894737                  24.12                                   NA
#> 22 0.7894737                  24.12                                   NA
#> 23 0.8333333                  24.12                                   NA
#> 24 0.8333333                  24.12                                   NA
#> 25 0.7500000                  24.12                                   NA
#> 26 0.7500000                  24.12                                   NA
#> 27 0.7500000                  24.12                                   NA
#> 28 0.7894737                  24.12                                   NA
#> 29 0.7894737                  24.12                                   NA
#> 30 0.7894737                  24.12                                   NA
#> 31 0.7894737                  24.12                                   NA
#> 32 0.7894737                  24.12                                   NA
#> 33 0.8333333                  24.12                                   NA
#> 34 0.8333333                  24.12                                   NA
#> 35 0.8333333                  24.12                                   NA
#> 36 0.8333333                  24.12                                   NA
#> 37 0.8333333                  24.12                                   NA
#> 38 0.8333333                  24.12                                   NA
#> 39 0.8333333                  24.12                                   NA
#> 40 0.8333333                  24.12                                   NA
#> 41 0.8333333                  24.12                                   NA
#> 42 0.8333333                  24.12                                   NA
#> 43 0.8333333                  24.12                                   NA
#> 44 0.8333333                  24.12                                   NA
#> 45 0.8333333                  24.12                                   NA
#> 46 0.8333333                  24.12                                   NA
#> 47 0.8333333                  24.12                                   NA
#> 48 0.8333333                  24.12                                   NA
#> 49 0.8333333                  24.12                                   NA
#> 50 0.8333333                  24.12                                   NA
#>    Size.final..m Swept.area      RSA_x   RS  RSA  RSRSA  CPUEst_kg   CPUEst
#> 1             NA         NA -0.3333333 0.79 1.47 1.1613  0.1204830   4.6452
#> 2             NA         NA -0.3333333 0.79 1.47 1.1613  0.1822835   4.6452
#> 3             NA         NA -0.3333333 0.79 1.47 1.1613  0.2626401   4.6452
#> 4             NA         NA -0.3333333 0.79 1.47 1.1613  1.8467787   9.2904
#> 5             NA         NA -0.3333333 0.79 1.47 1.1613  2.0672344   9.2904
#> 6             NA         NA -0.3333333 0.79 1.47 1.1613  2.3048667   9.2904
#> 7             NA         NA -0.3333333 0.79 1.47 1.1613  4.9433314   2.3226
#> 8             NA         NA -0.3333333 0.79 1.47 1.1613  5.2124287   2.3226
#> 9             NA         NA -0.3333333 0.79 1.47 1.1613  5.4912802   2.3226
#> 10            NA         NA -0.3333333 0.79 1.47 1.1613  7.0382381   2.3226
#> 11            NA         NA -0.3333333 0.79 1.47 1.1613  1.2264845   2.3226
#> 12            NA         NA -0.3333333 0.79 1.47 1.1613  4.1978820   4.6452
#> 13            NA         NA -0.3333333 0.79 1.47 1.1613  2.7490017   2.3226
#> 14            NA         NA -0.3333333 0.79 1.47 1.1613  2.9305031   2.3226
#> 15            NA         NA -0.3333333 0.79 1.47 1.1613  6.2399106   4.6452
#> 16            NA         NA -0.3333333 0.79 1.47 1.1613  3.7378065   2.3226
#> 17            NA         NA -0.3333333 0.79 1.47 1.1613  2.5603521   9.2904
#> 18            NA         NA -0.3333333 0.79 1.47 1.1613  3.5429618  11.6130
#> 19            NA         NA -0.3333333 0.79 1.47 1.1613  3.1275998   9.2904
#> 20            NA         NA -0.3333333 0.79 1.47 1.1613  6.0212713  16.2582
#> 21            NA         NA -0.3333333 0.79 1.47 1.1613  2.8308263   6.9678
#> 22            NA         NA -0.3333333 0.79 1.47 1.1613  2.0647065   4.6452
#> 23            NA         NA -0.3333333 0.83 1.47 1.2201  1.9089940   2.4402
#> 24            NA         NA -0.3333333 0.83 1.47 1.2201  2.7056662   2.4402
#> 25            NA         NA -0.3333333 0.75 1.47 1.1025  0.0041334   2.2050
#> 26            NA         NA -0.3333333 0.75 1.47 1.1025  0.9800822   2.2050
#> 27            NA         NA -0.3333333 0.75 1.47 1.1025  1.3708485   2.2050
#> 28            NA         NA -0.3333333 0.79 1.47 1.1613  0.8601816   2.3226
#> 29            NA         NA -0.3333333 0.79 1.47 1.1613  1.0323533   2.3226
#> 30            NA         NA -0.3333333 0.79 1.47 1.1613  1.2264845   2.3226
#> 31            NA         NA -0.3333333 0.79 1.47 1.1613  2.0989410   2.3226
#> 32            NA         NA -0.3333333 0.79 1.47 1.1613  3.5234277   2.3226
#> 33            NA         NA -0.3333333 0.83 1.47 1.2201  0.6694759   8.5407
#> 34            NA         NA -0.3333333 0.83 1.47 1.2201  1.4463854  15.8613
#> 35            NA         NA -0.3333333 0.83 1.47 1.2201  2.1882930  15.8613
#> 36            NA         NA -0.3333333 0.83 1.47 1.2201  8.7880661  56.1246
#> 37            NA         NA -0.3333333 0.83 1.47 1.2201  9.9245380  56.1246
#> 38            NA         NA -0.3333333 0.83 1.47 1.2201 31.5296555 158.6130
#> 39            NA         NA -0.3333333 0.83 1.47 1.2201  6.2004240   8.5407
#> 40            NA         NA -0.3333333 0.83 1.47 1.2201 16.4522772  15.8613
#> 41            NA         NA -0.3333333 0.83 1.47 1.2201  9.4698317   8.5407
#> 42            NA         NA -0.3333333 0.83 1.47 1.2201 10.7760906   8.5407
#> 43            NA         NA -0.3333333 0.83 1.47 1.2201 25.7760468  63.4452
#> 44            NA         NA -0.3333333 0.83 1.47 1.2201 14.1001160  31.7226
#> 45            NA         NA -0.3333333 0.83 1.47 1.2201 11.8362993  24.4020
#> 46            NA         NA -0.3333333 0.83 1.47 1.2201  4.5100473   8.5407
#> 47            NA         NA -0.3333333 0.83 1.47 1.2201  9.0978649  15.8613
#> 48            NA         NA -0.3333333 0.83 1.47 1.2201  5.3097532   8.5407
#> 49            NA         NA -0.3333333 0.83 1.47 1.2201 35.2934484 158.6130
#> 50            NA         NA -0.3333333 0.83 1.47 1.2201 35.4154444 142.7517
#>       cpue_kg     cpue   cpue_kg_un cpue_un          kg    abund      density
#> 1   0.1204830   4.6452  0.103748400       4  0.06024151  2.32260  0.133870018
#> 2   0.1822835   4.6452  0.156965010       4  0.09114173  2.32260  0.202537185
#> 3   0.2626401   4.6452  0.226160424       4  0.13132005  2.32260  0.291822333
#> 4   1.8467787   9.2904  1.590268412       8  0.92338935  4.64520  2.051976341
#> 5   2.0672344   9.2904  1.780103692       8  1.03361721  4.64520  2.296927130
#> 6   2.3048667   9.2904  1.984729814       8  1.15243337  4.64520  2.560963037
#> 7   4.9433314   2.3226  4.256722129       2  2.47166570  1.16130  5.492590453
#> 8   5.2124287   2.3226  4.488442837       2  2.60621433  1.16130  5.791587407
#> 9   5.4912802   2.3226  4.728563006       2  2.74564011  1.16130  6.101422465
#> 10  7.0382381   2.3226  6.060654536       2  3.51911906  1.16130  7.820264570
#> 11  1.2264845   2.3226  1.056130604       2  0.61324224  1.16130  1.362760522
#> 12  4.1978820   4.6452  3.614812692       4  2.09894099  2.32260  4.664313310
#> 13  2.7490017   2.3226  2.367176188       2  1.37450085  1.16130  3.054446341
#> 14  2.9305031   2.3226  2.523467769       2  1.46525156  1.16130  3.256114579
#> 15  6.2399106   4.6452  5.373211615       4  3.11995532  2.32260  6.933234054
#> 16  3.7378065   2.3226  3.218639848       2  1.86890323  1.16130  4.153118283
#> 17  2.5603521   9.2904  2.204729295       8  1.28017607  4.64520  2.844835700
#> 18  3.5429618  11.6130  3.050858346      10  1.77148090  5.80650  3.936624220
#> 19  3.1275998   9.2904  2.693188473       8  1.56379989  4.64520  3.475110860
#> 20  6.0212713  16.2582  5.184940437      14  3.01063566  8.12910  6.690301477
#> 21  2.8308263   6.9678  2.437635642       6  1.41541314  3.48390  3.145362524
#> 22  2.0647065   4.6452  1.777926905       4  1.03235326  2.32260  2.294118349
#> 23  1.9089940   2.4402  1.564620936       2  0.95449700  1.22010  2.121104449
#> 24  2.7056662   2.4402  2.217577415       2  1.35283310  1.22010  3.006295782
#> 25  0.0041334   2.2050  0.003749116       2  0.00206670  1.10250  0.004592667
#> 26  0.9800822   2.2050  0.888963452       2  0.49004110  1.10250  1.088980229
#> 27  1.3708485   2.2050  1.243399996       2  0.68542425  1.10250  1.523164995
#> 28  0.8601816   2.3226  0.740705777       2  0.43009081  1.16130  0.955757354
#> 29  1.0323533   2.3226  0.888963452       2  0.51617663  1.16130  1.147059175
#> 30  1.2264845   2.3226  1.056130604       2  0.61324224  1.16130  1.362760522
#> 31  2.0989410   2.3226  1.807406346       2  1.04947049  1.16130  2.332156655
#> 32  3.5234277   2.3226  3.034037419       2  1.76171383  1.16130  3.914919617
#> 33  0.6694759   8.5407  0.548705733       7  0.33473793  4.27035  0.743862072
#> 34  1.4463854  15.8613  1.185464615      13  0.72319269  7.93065  1.607094863
#> 35  2.1882930  15.8613  1.793535765      13  1.09414649  7.93065  2.431436652
#> 36  8.7880661  56.1246  7.202742459      46  4.39403304 28.06230  9.764517861
#> 37  9.9245380  56.1246  8.134200440      46  4.96226898 28.06230 11.027264397
#> 38 31.5296555 158.6130 25.841861699     130 15.76482773 79.30650 35.032950510
#> 39  6.2004240   8.5407  5.081898208       7  3.10021200  4.27035  6.889360005
#> 40 16.4522772  15.8613 13.484367814      13  8.22613858  7.93065 18.280307966
#> 41  9.4698317   8.5407  7.761520951       7  4.73491586  4.27035 10.522035236
#> 42 10.7760906   8.5407  8.832137193       7  5.38804529  4.27035 11.973433988
#> 43 25.7760468  63.4452 21.126175567      52 12.88802340 31.72260 28.640052010
#> 44 14.1001160  31.7226 11.556524879      26  7.05005800 15.86130 15.666795562
#> 45 11.8362993  24.4020  9.701089523      20  5.91814966 12.20100 13.151443697
#> 46  4.5100473   8.5407  3.696457113       7  2.25502366  4.27035  5.011163693
#> 47  9.0978649  15.8613  7.456655137      13  4.54893247  7.93065 10.108738814
#> 48  5.3097532   8.5407  4.351899986       7  2.65487659  4.27035  5.899725747
#> 49 35.2934484 158.6130 28.926684992     130 17.64672418 79.30650 39.214942620
#> 50 35.4154444 142.7517 29.026673534     117 17.70772219 71.37585 39.350493755
#>    density_ab no_catches
#> 1    5.161333          7
#> 2    5.161333          7
#> 3    5.161333          7
#> 4   10.322667          7
#> 5   10.322667          7
#> 6   10.322667          7
#> 7    2.580667          7
#> 8    2.580667          7
#> 9    2.580667          7
#> 10   2.580667          7
#> 11   2.580667          7
#> 12   5.161333          7
#> 13   2.580667          7
#> 14   2.580667          7
#> 15   5.161333          7
#> 16   2.580667          7
#> 17  10.322667          7
#> 18  12.903333          7
#> 19  10.322667          7
#> 20  18.064667          7
#> 21   7.742000          7
#> 22   5.161333          7
#> 23   2.711333          2
#> 24   2.711333          2
#> 25   2.450000          2
#> 26   2.450000          2
#> 27   2.450000          2
#> 28   2.580667          2
#> 29   2.580667          2
#> 30   2.580667          2
#> 31   2.580667          2
#> 32   2.580667          2
#> 33   9.489667         12
#> 34  17.623667         12
#> 35  17.623667         12
#> 36  62.360667         12
#> 37  62.360667         12
#> 38 176.236667         12
#> 39   9.489667         12
#> 40  17.623667         12
#> 41   9.489667         12
#> 42   9.489667         12
#> 43  70.494667         12
#> 44  35.247333         12
#> 45  27.113333         12
#> 46   9.489667         12
#> 47  17.623667         12
#> 48   9.489667         12
#> 49 176.236667         12
#> 50 158.613000         12

hlcodhaul %>% 
  group_by(haul.id) %>% 
  filter(CPUEun == min(CPUEun)) %>% 
  ungroup() %>% 
  distinct(CPUEun)
#> # A tibble: 1 × 1
#>   CPUEun
#>    <dbl>
#> 1      0

# The minimum CPUE in all hauls is always zero at this stage. It doesn't really matter because I calculate haul-level CPUE by grouping by ID's and summing. But let's remove them anyway

hlcodhaul %>%
  group_by(haul.id) %>%
  summarise(cpue_haul = sum(cpue)) %>% 
  ungroup() %>% 
  filter(!cpue_haul == 0)
#> # A tibble: 8,239 × 2
#>    haul.id                  cpue_haul
#>    <chr>                        <dbl>
#>  1 1993:1:GFR:SOL:H20:21:1     128.  
#>  2 1993:1:GFR:SOL:H20:22:32      4.88
#>  3 1993:1:GFR:SOL:H20:23:31      6.62
#>  4 1993:1:GFR:SOL:H20:24:30     11.6 
#>  5 1993:1:GFR:SOL:H20:25:2    1220.  
#>  6 1993:1:GFR:SOL:H20:26:3     478.  
#>  7 1993:1:GFR:SOL:H20:27:27     31.7 
#>  8 1993:1:GFR:SOL:H20:28:24   3548.  
#>  9 1993:1:GFR:SOL:H20:29:29     22.0 
#> 10 1993:1:GFR:SOL:H20:30:28      2.44
#> # … with 8,229 more rows

# Rename columns and select specific columns from the cod data
datcod <- hlcodhaul %>%
  dplyr::select(density, Year, ShootLat, ShootLong, Quarter, Country, Month, haul.id, IDx, Rect, sub_div, length_cm2, id_haul_stomach) %>% 
  rename(year = Year,
         lat = ShootLat,
         lon = ShootLong,
         quarter = Quarter,
         ices_rect = Rect,
         length_cm = length_cm2) %>% 
  mutate(species = "cod")

# Now do the same for flounder
# Rename columns and select specific columns from the cod data
datfle <- hlflehaul %>%
  dplyr::select(density, Year, ShootLat, ShootLong, Quarter, Country, Month, haul.id, IDx, Rect, sub_div, length_cm2, id_haul_stomach) %>% 
  rename(year = Year,
         lat = ShootLat,
         lon = ShootLong,
         quarter = Quarter,
         ices_rect = Rect,
         length_cm = length_cm2) %>% 
  mutate(species = "flounder")
# Because it's size-based cpue, I want the data frame to be "full", so that each haul has every size, even if all are empty. Now I only have lengths with catches, and no lengths if catch is zero.
datcod %>% group_by(haul.id) %>% summarise(n_size = length(unique(length_cm))) %>% distinct(n_size, .keep_all = TRUE)
#> group_by: one grouping variable (haul.id)
#> summarise: now 9,373 rows and 2 columns, ungrouped
#> distinct: removed 9,304 rows (99%), 69 rows remaining
#> # A tibble: 69 × 2
#>    haul.id                  n_size
#>    <chr>                     <int>
#>  1 1993:1:GFR:SOL:H20:21:1      23
#>  2 1993:1:GFR:SOL:H20:22:32      3
#>  3 1993:1:GFR:SOL:H20:23:31      4
#>  4 1993:1:GFR:SOL:H20:24:30      6
#>  5 1993:1:GFR:SOL:H20:26:3      31
#>  6 1993:1:GFR:SOL:H20:27:27     10
#>  7 1993:1:GFR:SOL:H20:28:24     36
#>  8 1993:1:GFR:SOL:H20:29:29      7
#>  9 1993:1:GFR:SOL:H20:30:28      2
#> 10 1993:1:GFR:SOL:H20:31:25     29
#> # … with 59 more rows
datcod %>% filter(haul.id == "1993:1:GFR:SOL:H20:23:31") %>% as.data.frame()
#> filter: removed 219,786 rows (>99%), 4 rows remaining
#>       density year     lat   lon quarter Country Month                  haul.id
#> 1 0.004592667 1993 54.5167 14.15       1     GFR     2 1993:1:GFR:SOL:H20:23:31
#> 2 1.088980229 1993 54.5167 14.15       1     GFR     2 1993:1:GFR:SOL:H20:23:31
#> 3 1.523164995 1993 54.5167 14.15       1     GFR     2 1993:1:GFR:SOL:H20:23:31
#> 4 0.000000000 1993 54.5167 14.15       1     GFR     2 1993:1:GFR:SOL:H20:23:31
#>                         IDx ices_rect sub_div length_cm      id_haul_stomach
#> 1 1993.1.GFR.06S1.H20.23.31      38G4      24         6 1993.1.2.GFR.38G4.31
#> 2 1993.1.GFR.06S1.H20.23.31      38G4      24        35 1993.1.2.GFR.38G4.31
#> 3 1993.1.GFR.06S1.H20.23.31      38G4      24        39 1993.1.2.GFR.38G4.31
#> 4 1993.1.GFR.06S1.H20.23.31      38G4      24         0                 <NA>
#>   species
#> 1     cod
#> 2     cod
#> 3     cod
#> 4     cod
datcod %>% group_by(haul.id) %>% mutate(tot_dens = sum(density)) %>% ungroup() %>% distinct(haul.id, .keep_all = TRUE) %>% filter(tot_dens == 0)
#> group_by: one grouping variable (haul.id)
#> mutate (grouped): new variable 'tot_dens' (double) with 8,227 unique values and 0% NA
#> ungroup: no grouping variables
#> distinct: removed 210,417 rows (96%), 9,373 rows remaining
#> filter: removed 8,239 rows (88%), 1,134 rows remaining
#> # A tibble: 1,134 × 15
#>    density  year   lat   lon quarter Country Month haul.id IDx   ices_…¹ sub_div
#>      <dbl> <int> <dbl> <dbl>   <int> <chr>   <int> <chr>   <chr> <chr>   <chr>  
#>  1       0  1993  55    17.5       1 GFR         2 1993:1… 1993… 39G7    25     
#>  2       0  1993  57.9  19.4       1 SWE         3 1993:1… 1993… 44G9    28     
#>  3       0  1993  57.1  17.9       3 SWE         8 1993:3… 1993… 43G7    27     
#>  4       0  1993  57.1  18.9       3 SWE         8 1993:3… 1993… 43G8    28     
#>  5       0  1993  57.1  18.8       3 SWE         8 1993:3… 1993… 43G8    28     
#>  6       0  1993  57.2  18.9       3 SWE         8 1993:3… 1993… 43G8    28     
#>  7       0  1993  57.3  17.9       3 SWE         8 1993:3… 1993… 43G7    27     
#>  8       0  1993  57.4  16.9       3 SWE         8 1993:3… 1993… 43G6    27     
#>  9       0  1993  57.4  19.2       3 SWE         8 1993:3… 1993… 43G9    28     
#> 10       0  1993  57.5  17.1       3 SWE         8 1993:3… 1993… 43G7    27     
#> # … with 1,124 more rows, 4 more variables: length_cm <dbl>,
#> #   id_haul_stomach <chr>, species <chr>, tot_dens <dbl>, and abbreviated
#> #   variable name ¹​ices_rect

# Create a data frame with all combinations of trawl IDs and lengths
ex_df <- data.frame(expand.grid(
  length_cm = seq_range(datcod$length_cm, by = 1),
  haul.id = unique(datcod$haul.id))
  )

# Create an ID that is haul + length
ex_df$haul.id.size <- paste(ex_df$haul.id, ex_df$length_cm, sep = ".")
datcod$haul.id.size <- paste(datcod$haul.id, datcod$length_cm, sep = ".")

# Remove IDs that are already in datcod
ex_df <- ex_df %>% filter(!haul.id.size %in% unique(datcod$haul.id.size)) 
#> filter: removed 219,790 rows (18%), 979,954 rows remaining

# Add in the other columns besides density and length
dat_for_join <- datcod %>% dplyr::select(-density, -length_cm, -haul.id.size) %>% distinct(haul.id, .keep_all = TRUE)
#> distinct: removed 210,417 rows (96%), 9,373 rows remaining

ex_df <- left_join(ex_df, dat_for_join, by = "haul.id")
#> left_join: added 11 columns (year, lat, lon, quarter, Country, …)
#>            > rows only in x         0
#>            > rows only in y  (      0)
#>            > matched rows     979,954
#>            >                 =========
#>            > rows total       979,954

datcod %>% filter(haul.id.size %in% ex_df$haul.id.size)
#> filter: removed all rows (100%)
#> # A tibble: 0 × 15
#> # … with 15 variables: density <dbl>, year <int>, lat <dbl>, lon <dbl>,
#> #   quarter <int>, Country <chr>, Month <int>, haul.id <chr>, IDx <chr>,
#> #   ices_rect <chr>, sub_div <chr>, length_cm <dbl>, id_haul_stomach <chr>,
#> #   species <chr>, haul.id.size <chr>

# Bind_rows these data with datcod
nrow(datcod) + nrow(ex_df)
#> [1] 1199744

unique(is.na(datcod$density))
#> [1] FALSE

datcod <- bind_rows(datcod, ex_df) %>% arrange(haul.id, length_cm)
nrow(datcod)
#> [1] 1199744
datcod
#> # A tibble: 1,199,744 × 15
#>    density  year   lat   lon quarter Country Month haul.id IDx   ices_…¹ sub_div
#>      <dbl> <int> <dbl> <dbl>   <int> <chr>   <int> <chr>   <chr> <chr>   <chr>  
#>  1       0  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  2      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  3      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  4      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  5      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  6      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  7      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  8      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  9      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#> 10      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#> # … with 1,199,734 more rows, 4 more variables: length_cm <dbl>,
#> #   id_haul_stomach <chr>, species <chr>, haul.id.size <chr>, and abbreviated
#> #   variable name ¹​ices_rect

# Replace NA density with 0 density because that's the added length-classes not previously in the catch data
datcod <- datcod %>% mutate(density = replace_na(density, 0))
#> mutate: changed 979,954 values (82%) of 'density' (979954 fewer NA)

# Check the proportion zeroes are still correct:
t <- datcod %>%
  group_by(haul.id) %>%
  summarise(haul_density = sum(density)) %>% 
  ungroup()
#> group_by: one grouping variable (haul.id)
#> summarise: now 9,373 rows and 2 columns, ungrouped
#> ungroup: no grouping variables

nrow(datcod)
#> [1] 1199744
length(unique(datcod$haul.id))
#> [1] 9373
nrow(t)
#> [1] 9373
t %>% filter(!haul_density == 0)
#> filter: removed 1,134 rows (12%), 8,239 rows remaining
#> # A tibble: 8,239 × 2
#>    haul.id                  haul_density
#>    <chr>                           <dbl>
#>  1 1993:1:GFR:SOL:H20:21:1         78.6 
#>  2 1993:1:GFR:SOL:H20:22:32         5.13
#>  3 1993:1:GFR:SOL:H20:23:31         2.62
#>  4 1993:1:GFR:SOL:H20:24:30         9.71
#>  5 1993:1:GFR:SOL:H20:25:2        405.  
#>  6 1993:1:GFR:SOL:H20:26:3        181.  
#>  7 1993:1:GFR:SOL:H20:27:27        11.6 
#>  8 1993:1:GFR:SOL:H20:28:24      1151.  
#>  9 1993:1:GFR:SOL:H20:29:29        15.4 
#> 10 1993:1:GFR:SOL:H20:30:28         3.42
#> # … with 8,229 more rows

# Now do flounder
# Create a data frame with all combinations of trawl IDs and lengths
ex_df <- data.frame(expand.grid(
  length_cm = seq_range(datfle$length_cm, by = 1),
  haul.id = unique(datfle$haul.id))
  )

# Create an ID that is haul + length
ex_df$haul.id.size <- paste(ex_df$haul.id, ex_df$length_cm, sep = ".")
datfle$haul.id.size <- paste(datfle$haul.id, datfle$length_cm, sep = ".")

# Remove IDs that are already in datfle
ex_df <- ex_df %>% filter(!haul.id.size %in% unique(datfle$haul.id.size)) 
#> filter: removed 111,868 rows (20%), 446,343 rows remaining

# Add in the other columns besides density and length
dat_for_join <- datfle %>% dplyr::select(-density, -length_cm, -haul.id.size) %>% distinct(haul.id, .keep_all = TRUE)
#> distinct: removed 104,371 rows (92%), 9,151 rows remaining

ex_df <- left_join(ex_df, dat_for_join, by = "haul.id")
#> left_join: added 11 columns (year, lat, lon, quarter, Country, …)
#>            > rows only in x         0
#>            > rows only in y  (      0)
#>            > matched rows     446,343
#>            >                 =========
#>            > rows total       446,343

datfle %>% filter(haul.id.size %in% ex_df$haul.id.size)
#> filter: removed all rows (100%)
#> # A tibble: 0 × 15
#> # … with 15 variables: density <dbl>, year <int>, lat <dbl>, lon <dbl>,
#> #   quarter <int>, Country <chr>, Month <int>, haul.id <chr>, IDx <chr>,
#> #   ices_rect <chr>, sub_div <chr>, length_cm <dbl>, id_haul_stomach <chr>,
#> #   species <chr>, haul.id.size <chr>

# Bind_rows these data with datfle
nrow(datfle) + nrow(ex_df)
#> [1] 559865

unique(is.na(datfle$density))
#> [1] FALSE

datfle <- bind_rows(datfle, ex_df) %>% arrange(haul.id, length_cm)
nrow(datfle)
#> [1] 559865
datfle
#> # A tibble: 559,865 × 15
#>    density  year   lat   lon quarter Country Month haul.id IDx   ices_…¹ sub_div
#>      <dbl> <int> <dbl> <dbl>   <int> <chr>   <int> <chr>   <chr> <chr>   <chr>  
#>  1       0  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  2      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  3      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  4      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  5      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  6      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  7      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  8      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  9      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#> 10      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#> # … with 559,855 more rows, 4 more variables: length_cm <dbl>,
#> #   id_haul_stomach <chr>, species <chr>, haul.id.size <chr>, and abbreviated
#> #   variable name ¹​ices_rect

# Replace NA density with 0 density because that's the added length-classes not previously in the catch data
datfle <- datfle %>% mutate(density = replace_na(density, 0))
#> mutate: changed 446,343 values (80%) of 'density' (446343 fewer NA)

# Merge cod and flounder data!
dat <- bind_rows(datcod, datfle)

glimpse(dat)
#> Rows: 1,759,609
#> Columns: 15
#> $ density         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
#> $ year            <int> 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, …
#> $ lat             <dbl> 54.6833, 54.6833, 54.6833, 54.6833, 54.6833, 54.6833, …
#> $ lon             <dbl> 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13…
#> $ quarter         <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
#> $ Country         <chr> "GFR", "GFR", "GFR", "GFR", "GFR", "GFR", "GFR", "GFR"…
#> $ Month           <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
#> $ haul.id         <chr> "1993:1:GFR:SOL:H20:21:1", "1993:1:GFR:SOL:H20:21:1", …
#> $ IDx             <chr> "1993.1.GFR.06S1.H20.21.1", "1993.1.GFR.06S1.H20.21.1"…
#> $ ices_rect       <chr> "38G3", "38G3", "38G3", "38G3", "38G3", "38G3", "38G3"…
#> $ sub_div         <chr> "24", "24", "24", "24", "24", "24", "24", "24", "24", …
#> $ length_cm       <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
#> $ id_haul_stomach <chr> NA, "1993.1.2.GFR.38G3.1", "1993.1.2.GFR.38G3.1", "199…
#> $ species         <chr> "cod", "cod", "cod", "cod", "cod", "cod", "cod", "cod"…
#> $ haul.id.size    <chr> "1993:1:GFR:SOL:H20:21:1.0", "1993:1:GFR:SOL:H20:21:1.…

# Check proportion zeroes
dat %>% 
  filter(species == "cod") %>% 
  group_by(haul.id) %>% 
  summarise(haul_dens = sum(density)) %>% 
  ungroup() %>% 
  filter(!haul_dens == 0)
#> filter: removed 559,865 rows (32%), 1,199,744 rows remaining
#> group_by: one grouping variable (haul.id)
#> summarise: now 9,373 rows and 2 columns, ungrouped
#> ungroup: no grouping variables
#> filter: removed 1,134 rows (12%), 8,239 rows remaining
#> # A tibble: 8,239 × 2
#>    haul.id                  haul_dens
#>    <chr>                        <dbl>
#>  1 1993:1:GFR:SOL:H20:21:1      78.6 
#>  2 1993:1:GFR:SOL:H20:22:32      5.13
#>  3 1993:1:GFR:SOL:H20:23:31      2.62
#>  4 1993:1:GFR:SOL:H20:24:30      9.71
#>  5 1993:1:GFR:SOL:H20:25:2     405.  
#>  6 1993:1:GFR:SOL:H20:26:3     181.  
#>  7 1993:1:GFR:SOL:H20:27:27     11.6 
#>  8 1993:1:GFR:SOL:H20:28:24   1151.  
#>  9 1993:1:GFR:SOL:H20:29:29     15.4 
#> 10 1993:1:GFR:SOL:H20:30:28      3.42
#> # … with 8,229 more rows

codq4 <- dat %>% 
  filter(species == "cod" & quarter == 4) %>% 
  group_by(haul.id) %>% 
  mutate(haul_dens = sum(density)) %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  mutate(zero_catch = ifelse(haul_dens == 0, "Y", "N")) %>% 
  group_by(year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_z = Y / (N+Y), 
         q = 4,
         species = "cod")  
#> filter: removed 1,290,361 rows (73%), 469,248 rows remaining
#> group_by: one grouping variable (haul.id)
#> mutate (grouped): new variable 'haul_dens' (double) with 3,260 unique values and 0% NA
#> distinct (grouped): removed 465,582 rows (99%), 3,666 rows remaining
#> mutate (grouped): new variable 'zero_catch' (character) with 2 unique values and 0% NA
#> group_by: 2 grouping variables (year, zero_catch)
#> summarise: now 56 rows and 3 columns, one group variable remaining (year)
#> ungroup: no grouping variables
#> pivot_wider: reorganized (zero_catch, n) into (N, Y) [was 56x3, now 28x3]
#> mutate: new variable 'prop_z' (double) with 27 unique values and 0% NA
#>         new variable 'q' (double) with one unique value and 0% NA
#>         new variable 'species' (character) with one unique value and 0% NA

codq1 <- dat %>% 
  filter(species == "cod" & quarter == 1) %>% 
  group_by(haul.id) %>% 
  mutate(haul_dens = sum(density)) %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  mutate(zero_catch = ifelse(haul_dens == 0, "Y", "N")) %>% 
  group_by(year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_z = Y / (N+Y), 
         q = 1,
         species = "cod")  
#> filter: removed 1,043,449 rows (59%), 716,160 rows remaining
#> group_by: one grouping variable (haul.id)
#> mutate (grouped): new variable 'haul_dens' (double) with 4,968 unique values and 0% NA
#> distinct (grouped): removed 710,565 rows (99%), 5,595 rows remaining
#> mutate (grouped): new variable 'zero_catch' (character) with 2 unique values and 0% NA
#> group_by: 2 grouping variables (year, zero_catch)
#> summarise: now 56 rows and 3 columns, one group variable remaining (year)
#> ungroup: no grouping variables
#> pivot_wider: reorganized (zero_catch, n) into (N, Y) [was 56x3, now 28x3]
#> mutate: new variable 'prop_z' (double) with 28 unique values and 0% NA
#>         new variable 'q' (double) with one unique value and 0% NA
#>         new variable 'species' (character) with one unique value and 0% NA

fleq4 <- dat %>% 
  filter(species == "flounder" & quarter == 4) %>% 
  group_by(haul.id) %>% 
  mutate(haul_dens = sum(density)) %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  mutate(zero_catch = ifelse(haul_dens == 0, "Y", "N")) %>% 
  group_by(year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_z = Y / (N+Y), 
         q = 4,
         species = "flounder")
#> filter: removed 1,535,795 rows (87%), 223,814 rows remaining
#> group_by: one grouping variable (haul.id)
#> mutate (grouped): new variable 'haul_dens' (double) with 2,983 unique values and 0% NA
#> distinct (grouped): removed 220,157 rows (98%), 3,657 rows remaining
#> mutate (grouped): new variable 'zero_catch' (character) with 2 unique values and 0% NA
#> group_by: 2 grouping variables (year, zero_catch)
#> summarise: now 55 rows and 3 columns, one group variable remaining (year)
#> ungroup: no grouping variables
#> pivot_wider: reorganized (zero_catch, n) into (N, Y) [was 55x3, now 28x3]
#> mutate: new variable 'prop_z' (double) with 27 unique values and 4% NA
#>         new variable 'q' (double) with one unique value and 0% NA
#>         new variable 'species' (character) with one unique value and 0% NA

fleq1 <- dat %>% 
  filter(species == "flounder" & quarter == 1) %>% 
  group_by(haul.id) %>% 
  mutate(haul_dens = sum(density)) %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  mutate(zero_catch = ifelse(haul_dens == 0, "Y", "N")) %>% 
  group_by(year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_z = Y / (N+Y), 
         q = 1,
         species = "flounder")
#> filter: removed 1,430,390 rows (81%), 329,219 rows remaining
#> group_by: one grouping variable (haul.id)
#> mutate (grouped): new variable 'haul_dens' (double) with 5,026 unique values and 0% NA
#> distinct (grouped): removed 323,837 rows (98%), 5,382 rows remaining
#> mutate (grouped): new variable 'zero_catch' (character) with 2 unique values and 0% NA
#> group_by: 2 grouping variables (year, zero_catch)
#> summarise: now 56 rows and 3 columns, one group variable remaining (year)
#> ungroup: no grouping variables
#> pivot_wider: reorganized (zero_catch, n) into (N, Y) [was 56x3, now 28x3]
#> mutate: new variable 'prop_z' (double) with 28 unique values and 0% NA
#>         new variable 'q' (double) with one unique value and 0% NA
#>         new variable 'species' (character) with one unique value and 0% NA

ggplot(bind_rows(codq1, codq4, fleq1, fleq4), aes(year, prop_z*100, color = factor(q))) +
  geom_line() +
  facet_wrap(~ species, ncol = 1)


# Save the full catch data as dat_full. For adding the unique covariates, I only need the distinct trawl IDs, not the 1.7 million rows...
dat_full <- dat
dat <- dat %>% dplyr::select(-density, length_cm, species, haul.id.size) %>% distinct(haul.id, .keep_all = TRUE)
#> distinct: removed 1,750,236 rows (99%), 9,373 rows remaining

# Check 0 catches
dat_full %>%
  filter(species == "cod") %>%
  group_by(haul.id) %>%
  summarise(haul_dens = sum(density)) %>%
  ungroup() %>%
  filter(!haul_dens == 0)
#> filter: removed 559,865 rows (32%), 1,199,744 rows remaining
#> group_by: one grouping variable (haul.id)
#> summarise: now 9,373 rows and 2 columns, ungrouped
#> ungroup: no grouping variables
#> filter: removed 1,134 rows (12%), 8,239 rows remaining
#> # A tibble: 8,239 × 2
#>    haul.id                  haul_dens
#>    <chr>                        <dbl>
#>  1 1993:1:GFR:SOL:H20:21:1      78.6 
#>  2 1993:1:GFR:SOL:H20:22:32      5.13
#>  3 1993:1:GFR:SOL:H20:23:31      2.62
#>  4 1993:1:GFR:SOL:H20:24:30      9.71
#>  5 1993:1:GFR:SOL:H20:25:2     405.  
#>  6 1993:1:GFR:SOL:H20:26:3     181.  
#>  7 1993:1:GFR:SOL:H20:27:27     11.6 
#>  8 1993:1:GFR:SOL:H20:28:24   1151.  
#>  9 1993:1:GFR:SOL:H20:29:29     15.4 
#> 10 1993:1:GFR:SOL:H20:30:28      3.42
#> # … with 8,229 more rows

Add in the environmental variables

Substrate

substrate <- raster("data/substrate_tif/BALANCE_SEABED_SEDIMENT.tif")
#> Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
#> prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in CRS definition
substrate_longlat = projectRaster(substrate, crs = ('+proj=longlat'))

# Now extract the values from the saduria raster to dat
dat$substrate <- extract(substrate_longlat, dat %>% dplyr::select(lon, lat))

unique(dat$substrate)
#>   [1] 3.000000 5.000000 4.000000 1.138341 4.446301 2.000000 3.734845 1.000000
#>   [9] 4.066592 3.807718 4.438934 4.842565 4.798625 4.840808 4.379877 3.512073
#>  [17] 4.165947 4.706898 1.710033 4.659087 4.950544 3.852555 4.155707 4.992289
#>  [25] 2.727517 3.719923 4.909709 2.064201 4.238063 4.498091 3.251222 4.843821
#>  [33] 2.055089 1.402886 3.429073 4.639466 4.752425 4.038259 2.716163 3.078310
#>  [41] 4.169430 3.024738 2.007402 4.957880 3.838716 3.486158 3.243755 2.137971
#>  [49] 3.622974 2.194785 4.814487 4.018015 4.415854 4.834043 4.054638 4.302037
#>  [57] 4.412596 3.541952 3.573007 4.943159 2.985540 4.102325 4.102802 4.582128
#>  [65] 4.168352 4.367814 4.528939 4.609104 4.982928 2.837596 3.011781 4.468152
#>  [73] 2.205551 2.720424 3.362097 3.161689 4.246033 2.199707 3.268652 2.867899
#>  [81] 3.313120 2.384691 3.727420 4.978446 3.843230 4.533843 3.536023 2.308508
#>  [89] 3.376948 4.676910 2.593988 3.781663 4.239673 3.902180 4.490885 4.648592
#>  [97] 3.349005 4.041116 1.412668 3.606772 3.029200 4.536513 2.296147 3.851118
#> [105] 2.127370 2.571771 3.777866 2.899165 3.694996 3.810013 2.463042 3.845765
#> [113] 2.370193 4.636803 4.278285 4.131986 2.738054 2.370191 2.015659 4.899457
#> [121] 2.856868 4.155565 4.812246 3.535448 2.571619 4.209975 3.443494 3.032781
#> [129] 3.927895 4.980990 3.861495 4.014337 3.928918 2.892701 4.286916 4.202067
#> [137] 4.819160 2.843091 3.753357 2.009381 3.247610 4.384879 2.753052 2.367529
#> [145] 3.372948 4.708170 3.596280 3.048587 2.152307 2.986556 4.651357 3.666669
#> [153] 2.676910 4.866229 3.974051 3.906186 3.112969 3.538048 4.910212 4.506713
#> [161] 2.229670 2.962793 3.628756 3.067581 4.227833 3.424838 4.620548 2.791879
#> [169] 4.778975 4.952352 2.033904 3.983402 2.918824 4.493097 2.271779 4.119308
#> [177] 4.996012 2.588547 4.051414 3.318623 3.480814 4.873419 4.355332 3.508960
#> [185] 3.797020 4.127856 4.470470 4.457530 3.552718 3.438254 4.525368 4.900706
#> [193] 2.044159 4.212301 4.011298 3.523882 3.060409 4.991976 3.245151

# Plot
ggplot(dat, aes(lon, lat, color = substrate)) + 
  geom_point()


factor(sort(unique(round(dat$substrate))))
#> [1] 1 2 3 4 5
#> Levels: 1 2 3 4 5

dat$substrate <- round(dat$substrate)

dat <- dat %>% mutate(substrate = ifelse(substrate == 1, "bedrock", substrate),
                      substrate = ifelse(substrate == 2, "hard-bottom complex", substrate),
                      substrate = ifelse(substrate == 3, "sand", substrate),
                      substrate = ifelse(substrate == 4, "hard clay", substrate),
                      substrate = ifelse(substrate == 5, "mud", substrate))
#> mutate: converted 'substrate' from double to character (0 new NA)
# I. Bedrock.
# II. Hard bottom complex, includes patchy hard surfaces and coarse sand (sometimes also clay) to boulders.
# III. Sand including fine to coarse sand (with gravel exposures).
# IV. Hard clay sometimes/often/possibly exposed or covered with a thin layer of
# sand/gravel.
# V. Mud including gyttja-clay to gyttja-silt.

# Plot
ggplot(dat, aes(lon, lat, color = substrate)) + 
  geom_point()

Depth

# Read the tifs
west <- raster("data/depth_geo_tif/D5_2018_rgb-1.tif")
#plot(west)

east <- raster("data/depth_geo_tif/D6_2018_rgb-1.tif")
# plot(east)

dep_rast <- raster::merge(west, east)

dat$depth <- extract(dep_rast, dat %>% dplyr::select(lon, lat))

# Convert to depth (instead of elevation)
ggplot(dat, aes(depth)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

dat$depth <- (dat$depth - max(dat$depth)) *-1
ggplot(dat, aes(depth)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Oxygen

# Downloaded from here: https://resources.marine.copernicus.eu/?option=com_csw&view=details&product_id=BALTICSEA_REANALYSIS_BIO_003_012
# Extract raster points: https://gisday.wordpress.com/2014/03/24/extract-raster-values-from-points-using-r/comment-page-1/
# https://rpubs.com/boyerag/297592
# https://pjbartlein.github.io/REarthSysSci/netCDF.html#get-a-variable
# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1664182224542.nc")

print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1664182224542.nc (NC_FORMAT_CLASSIC):
#> 
#>      1 variables (excluding dimension variables):
#>         float o2b[longitude,latitude,time]   
#>             long_name: Sea_floor_Dissolved_Oxygen_Concentration
#>             missing_value: NaN
#>             standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
#>             units: mmol m-3
#>             _FillValue: NaN
#>             _ChunkSizes: 1
#>              _ChunkSizes: 523
#>              _ChunkSizes: 383
#> 
#>      3 dimensions:
#>         time  Size:336
#>             axis: T
#>             long_name: Validity time
#>             standard_name: time
#>             units: days since 1950-01-01 00:00:00
#>             calendar: gregorian
#>             _ChunkSizes: 512
#>             _CoordinateAxisType: Time
#>             valid_min: 15721.5
#>             valid_max: 25917.5
#>         latitude  Size:523
#>             axis: Y
#>             standard_name: latitude
#>             long_name: latitude
#>             units: degrees_north
#>             _CoordinateAxisType: Lat
#>             valid_min: 48.49169921875
#>             valid_max: 65.8914184570312
#>         longitude  Size:383
#>             standard_name: longitude
#>             long_name: longitude
#>             units: degrees_east
#>             axis: X
#>             _CoordinateAxisType: Lon
#>             valid_min: 9.01375484466553
#>             valid_max: 30.2357654571533
#> 
#>     24 global attributes:
#>         references: http://www.smhi.se
#>         institution: Swedish Meterological and Hydrological Institute
#>         history: See source and creation_date attributees
#>         Conventions: CF-1.5
#>         contact: servicedesk_cmems@mercator-ocean.eu
#>         comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#>         bullentin_type: reanalysis
#>         cmems_product_id: BALTICSEA_REANALYSIS_BIO_003_012
#>         title: CMEMS V4 Reanalysis: SCOBI model 3D fields (monthly means)
#>         FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#>         FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#>         FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#>         FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#>         shallowest_depth: 1.50136542320251
#>         deepest_depth: 711.059204101562
#>         source: SMHI reanalysis run NORDIC-NS2_1d_20201201_20201201
#>         file_quality_index: 1
#>         creation_date: 2021-11-09 UTC
#>         bullentin_date: 20201201
#>         start_date: 2020-12-01 UTC
#>         stop_date: 2020-12-01 UTC
#>         start_time: 00:00 UTC
#>         stop_time: 00:00 UTC
#>         _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention

# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530

lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 48.49170 48.52503 48.55836 48.59170 48.62503 48.65836

# Get time
time <- ncvar_get(ncin,"time")
time
#>   [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#>  [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#>  [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#>  [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#>  [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#>  [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#>  [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#>  [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#>  [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#>  [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#>  [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5
#> [325] 25582.5 25612.5 25642.5 25673.0 25703.5 25734.0 25764.5 25795.5 25826.0
#> [334] 25856.5 25887.0 25917.5

tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 336
tunits
#> $hasatt
#> [1] TRUE
#> 
#> $value
#> [1] "days since 1950-01-01 00:00:00"

# Get oxygen
dname <- "o2b"

oxy_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(oxy_array)
#> [1] 383 523 336

# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])

# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))

# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)

# Replace netCDF fill values with NA's
oxy_array[oxy_array == fillvalue$value] <- NA

# Next, we need to work with the months that correspond to the quarters that we use.
# loop through each time step, and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#>   [1]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#>  [26]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#>  [51]  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3
#>  [76]  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4
#> [101]  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5
#> [126]  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6
#> [151]  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7
#> [176]  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8
#> [201]  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9
#> [226] 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10
#> [251] 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11
#> [276] 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12
#> [301]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#> [326]  2  3  4  5  6  7  8  9 10 11 12

index_keep_q1 <- which(months < 4)
index_keep_q4 <- which(months > 9)

oxy_q1 <- oxy_array[, , index_keep_q1]
oxy_q4 <- oxy_array[, , index_keep_q4]

months_keep_q1 <- months[index_keep_q1]
months_keep_q4 <- months[index_keep_q4]

years_keep_q1 <- years[index_keep_q1]
years_keep_q4 <- years[index_keep_q4]

# Now we have an array with data for that quarter
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq_q1 <- seq(1, dim(oxy_q1)[3], by = 3)
loop_seq_q4 <- seq(1, dim(oxy_q4)[3], by = 3)

# Create objects that will hold data
dlist_q1 <- list()
dlist_q4 <- list()

oxy_1 <- c()
oxy_2 <- c()
oxy_3 <- c()
oxy_ave_q1 <- c()

oxy_10 <- c()
oxy_11 <- c()
oxy_12 <- c()
oxy_ave_q4 <- c()

# Now average by quarter. The vector loop_seq_q1 is 1, 4, 7 etc. So first i is 1, 2, 3,
# which is the index we want. 

for(i in loop_seq_q1) { # We can use q1 as looping index, doesn't matter!
  
  oxy_1 <- oxy_q1[, , (i)]
  oxy_2 <- oxy_q1[, , (i + 1)]
  oxy_3 <- oxy_q1[, , (i + 2)]
  
  oxy_10 <- oxy_q4[, , (i)]
  oxy_11 <- oxy_q4[, , (i + 1)]
  oxy_12 <- oxy_q4[, , (i + 2)]
  
  oxy_ave_q1 <- (oxy_1 + oxy_2 + oxy_3) / 3
  oxy_ave_q4 <- (oxy_10 + oxy_11 + oxy_12) / 3
    
  list_pos_q1 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  list_pos_q4 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  
  dlist_q1[[list_pos_q1]] <- oxy_ave_q1
  dlist_q4[[list_pos_q4]] <- oxy_ave_q4

}

# Now name the lists with the year:
names(dlist_q1) <- unique(years_keep_q1)
names(dlist_q4) <- unique(years_keep_q4)

# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script

# Filter years in the cpue data frame to only have the years I have oxygen for
d_sub_oxy_q1 <- dat %>% filter(quarter == 1) %>% filter(year %in% names(dlist_q1)) %>% droplevels()
#> filter: removed 3,778 rows (40%), 5,595 rows remaining
#> filter: no rows removed
d_sub_oxy_q4 <- dat %>% filter(quarter == 4) %>% filter(year %in% names(dlist_q4)) %>% droplevels()
#> filter: removed 5,707 rows (61%), 3,666 rows remaining
#> filter: no rows removed

# Create data holding object
oxy_data_list_q1 <- list()
oxy_data_list_q4 <- list()

# ... And for the oxygen raster
raster_list_q1 <- list()
raster_list_q4 <- list()

# Create factor year for indexing the list in the loop
d_sub_oxy_q1$year_f <- as.factor(d_sub_oxy_q1$year)
d_sub_oxy_q4$year_f <- as.factor(d_sub_oxy_q4$year)

# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_oxy_q1$year_f)) { # We can use q1 as looping index, doesn't matter!
  
  # Set plot limits
  ymin = 54; ymax = 58; xmin = 12; xmax = 22

  # Subset a year
  oxy_slice_q1 <- dlist_q1[[i]]
  oxy_slice_q4 <- dlist_q4[[i]]
  
  # Create raster for that year (i)
  r_q1 <- raster(t(oxy_slice_q1), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  r_q4 <- raster(t(oxy_slice_q4), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  
  # Flip...
  r_q1 <- flip(r_q1, direction = 'y')
  r_q4 <- flip(r_q4, direction = 'y')
  
  plot(r_q1, main = paste(i, "Q1"))
  plot(r_q4, main = paste(i, "Q4"))
  
  # Filter the same year (i) in the cpue data and select only coordinates
  d_slice_q1 <- d_sub_oxy_q1 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  d_slice_q4 <- d_sub_oxy_q4 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  
  # Make into a SpatialPoints object
  data_sp_q1 <- SpatialPoints(d_slice_q1)
  data_sp_q4 <- SpatialPoints(d_slice_q4)
  
  # Extract raster value (oxygen)
  rasValue_q1 <- raster::extract(r_q1, data_sp_q1)
  rasValue_q4 <- raster::extract(r_q4, data_sp_q4)
  
  # Now we want to plot the results of the raster extractions by plotting the cpue
  # data points over a raster and saving it for each year.
  # Make the SpatialPoints object into a raster again (for plot)
  df_q1 <- as.data.frame(data_sp_q1)
  df_q4 <- as.data.frame(data_sp_q4)
  
  # Add in the raster value in the df holding the coordinates for the cpue data
  d_slice_q1$oxy <- rasValue_q1
  d_slice_q4$oxy <- rasValue_q4
  
  # Add in which year
  d_slice_q1$year <- i
  d_slice_q4$year <- i

  # Now the unit of oxygen is mmol/m3. I want it to be ml/L. The original model is in unit ml/L
  # and it's been converted by the data host. Since it was converted without accounting for
  # pressure or temperature, I can simply use the following conversion factor:
  # 1 ml/l = 103/22.391 = 44.661 μmol/l -> 1 ml/l = 0.044661 mmol/l = 44.661 mmol/m^3 -> 0.0223909 ml/l = 1mmol/m^3
  # https://ocean.ices.dk/tools/unitconversion.aspx

  d_slice_q1$oxy <- d_slice_q1$oxy * 0.0223909
  d_slice_q4$oxy <- d_slice_q4$oxy * 0.0223909
    
  # Create a index for the data last where we store all years (because our loop index
  # i is not continuous, we can't use it directly)
  index_q1 <- as.numeric(as.character(d_slice_q1$year))[1] - 1992
  index_q4 <- as.numeric(as.character(d_slice_q4$year))[1] - 1992
  
  # Add each years' data in the list
  oxy_data_list_q1[[index_q1]] <- d_slice_q1
  oxy_data_list_q4[[index_q4]] <- d_slice_q4
  
  # Save to check each year is ok! First convert the raster to points for plotting
  # (so that we can use ggplot)
  map_q1 <- rasterToPoints(r_q1)
  map_q4 <- rasterToPoints(r_q4)
  
  # Make the points a dataframe for ggplot
  df_rast_q1 <- data.frame(map_q1)
  df_rast_q4 <- data.frame(map_q4)
  
  # Rename y-variable and add year
  df_rast_q1 <- df_rast_q1 %>% rename("oxy" = "layer") %>% mutate(year = i)
  df_rast_q4 <- df_rast_q4 %>% rename("oxy" = "layer") %>% mutate(year = i)
  
  # Add each years' raster data frame in the list
  raster_list_q1[[index_q1]] <- df_rast_q1
  raster_list_q4[[index_q4]] <- df_rast_q4
  
  # Make appropriate column headings
  colnames(df_rast_q1) <- c("Longitude", "Latitude", "oxy")
  colnames(df_rast_q4) <- c("Longitude", "Latitude", "oxy")
  
  # Change unit to ml/L
  df_rast_q1$oxy <- df_rast_q1$oxy * 0.0223909
  df_rast_q4$oxy <- df_rast_q4$oxy * 0.0223909

  # Make a map for q1
  ggplot(data = df_rast_q1, aes(y = Latitude, x = Longitude)) +
    geom_raster(aes(fill = oxy)) +
    geom_point(data = d_slice_q1, aes(x = lon, y = lat, fill = oxy),
               color = "black", size = 5, shape = 21) +
    theme_bw() +
    geom_sf(data = world, inherit.aes = F, size = 0.2) +
    coord_sf(xlim = c(xmin, xmax),
             ylim = c(ymin, ymax)) +
    scale_colour_gradientn(colours = rev(terrain.colors(10)),
                           limits = c(-5, 11)) +
    scale_fill_gradientn(colours = rev(terrain.colors(10)),
                         limits = c(-5, 11)) +
    NULL

  ggsave(paste("figures/supp/cpue_oxygen_rasters/", i,"q1.png", sep = ""),
         width = 6.5, height = 6.5, dpi = 600)

  # Make a map for q4
  ggplot(data = df_rast_q4, aes(y = Latitude, x = Longitude)) +
    geom_raster(aes(fill = oxy)) +
    geom_point(data = d_slice_q4, aes(x = lon, y = lat, fill = oxy),
               color = "black", size = 5, shape = 21) +
    theme_bw() +
    geom_sf(data = world, inherit.aes = F, size = 0.2) +
    coord_sf(xlim = c(xmin, xmax),
             ylim = c(ymin, ymax)) +
    scale_colour_gradientn(colours = rev(terrain.colors(10)),
                           limits = c(-5, 11)) +
    scale_fill_gradientn(colours = rev(terrain.colors(10)),
                         limits = c(-5, 11)) +
    NULL

  ggsave(paste("figures/supp/cpue_oxygen_rasters/", i,"q4.png", sep = ""),
         width = 6.5, height = 6.5, dpi = 600)

}

#> filter: removed 5,494 rows (98%), 101 rows remaining
#> filter: removed 3,605 rows (98%), 61 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,420 rows (97%), 175 rows remaining
#> filter: removed 3,604 rows (98%), 62 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,441 rows (97%), 154 rows remaining
#> filter: removed 3,613 rows (99%), 53 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,412 rows (97%), 183 rows remaining
#> filter: removed 3,605 rows (98%), 61 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,381 rows (96%), 214 rows remaining
#> filter: removed 3,591 rows (98%), 75 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,367 rows (96%), 228 rows remaining
#> filter: removed 3,598 rows (98%), 68 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,405 rows (97%), 190 rows remaining
#> filter: removed 3,572 rows (97%), 94 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,485 rows (98%), 110 rows remaining
#> filter: removed 3,577 rows (98%), 89 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,387 rows (96%), 208 rows remaining
#> filter: removed 3,550 rows (97%), 116 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,450 rows (97%), 145 rows remaining
#> filter: removed 3,549 rows (97%), 117 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,434 rows (97%), 161 rows remaining
#> filter: removed 3,538 rows (97%), 128 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,388 rows (96%), 207 rows remaining
#> filter: removed 3,557 rows (97%), 109 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,377 rows (96%), 218 rows remaining
#> filter: removed 3,515 rows (96%), 151 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,407 rows (97%), 188 rows remaining
#> filter: removed 3,516 rows (96%), 150 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,360 rows (96%), 235 rows remaining
#> filter: removed 3,497 rows (95%), 169 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,373 rows (96%), 222 rows remaining
#> filter: removed 3,490 rows (95%), 176 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,336 rows (95%), 259 rows remaining
#> filter: removed 3,486 rows (95%), 180 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,355 rows (96%), 240 rows remaining
#> filter: removed 3,504 rows (96%), 162 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,352 rows (96%), 243 rows remaining
#> filter: removed 3,486 rows (95%), 180 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,371 rows (96%), 224 rows remaining
#> filter: removed 3,533 rows (96%), 133 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,327 rows (95%), 268 rows remaining
#> filter: removed 3,518 rows (96%), 148 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,387 rows (96%), 208 rows remaining
#> filter: removed 3,490 rows (95%), 176 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,358 rows (96%), 237 rows remaining
#> filter: removed 3,500 rows (95%), 166 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,363 rows (96%), 232 rows remaining
#> filter: removed 3,453 rows (94%), 213 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,350 rows (96%), 245 rows remaining
#> filter: removed 3,441 rows (94%), 225 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,339 rows (95%), 256 rows remaining
#> filter: removed 3,456 rows (94%), 210 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,477 rows (98%), 118 rows remaining
#> filter: removed 3,574 rows (97%), 92 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,469 rows (98%), 126 rows remaining
#> filter: removed 3,564 rows (97%), 102 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA


# Now create a data frame from the list of all annual values
big_dat_oxy_q1 <- dplyr::bind_rows(oxy_data_list_q1)
big_dat_oxy_q4 <- dplyr::bind_rows(oxy_data_list_q4)
big_dat_oxy <- bind_rows(mutate(big_dat_oxy_q1, quarter = 1),
                         mutate(big_dat_oxy_q4, quarter = 4))
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA

big_raster_dat_oxy_q1 <- dplyr::bind_rows(raster_list_q1)
big_raster_dat_oxy_q4 <- dplyr::bind_rows(raster_list_q4)
big_raster_dat_oxy <- bind_rows(mutate(big_raster_dat_oxy_q1, quarter = 1),
                                mutate(big_raster_dat_oxy_q4, quarter = 4))
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA

# Plot data, looks like there's big inter-annual variation but a negative trend over time
big_raster_dat_oxy %>%
  group_by(quarter, year) %>%
  drop_na(oxy) %>%
  summarise(mean_oxy = mean(oxy)) %>%
  mutate(year_num = as.numeric(year)) %>%
  ggplot(., aes(year_num, mean_oxy)) +
  geom_point(size = 2) +
  stat_smooth(method = "lm") +
  facet_wrap(~ quarter) +
  NULL
#> group_by: 2 grouping variables (quarter, year)
#> drop_na (grouped): no rows removed
#> summarise: now 56 rows and 3 columns, one group variable remaining (quarter)
#> mutate (grouped): new variable 'year_num' (double) with 28 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'


big_raster_dat_oxy %>%
  group_by(quarter, year) %>%
  drop_na(oxy) %>%
  mutate(dead = ifelse(oxy < 0, "Y", "N")) %>%
  filter(dead == "Y") %>%
  mutate(n = n(),
         year_num = as.numeric(year)) %>%
  ggplot(., aes(year_num, n)) +
  geom_point(size = 2) +
  stat_smooth(method = "lm") +
  facet_wrap(~ quarter) +
  NULL
#> group_by: 2 grouping variables (quarter, year)
#> drop_na (grouped): no rows removed
#> mutate (grouped): new variable 'dead' (character) with 2 unique values and 0% NA
#> filter (grouped): removed 1,873,405 rows (96%), 84,075 rows remaining
#> mutate (grouped): new variable 'n' (integer) with 56 unique values and 0% NA
#>                   new variable 'year_num' (double) with 28 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'


# Now add in the new oxygen column in the original data:
str(d_sub_oxy_q1)
#> tibble [5,595 × 17] (S3: tbl_df/tbl/data.frame)
#>  $ year           : int [1:5595] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#>  $ lat            : num [1:5595] 54.7 54.5 54.5 54.5 54.7 ...
#>  $ lon            : num [1:5595] 13 14.3 14.2 14 13.1 ...
#>  $ quarter        : int [1:5595] 1 1 1 1 1 1 1 1 1 1 ...
#>  $ Country        : chr [1:5595] "GFR" "GFR" "GFR" "GFR" ...
#>  $ Month          : int [1:5595] 2 2 2 2 2 2 2 2 2 2 ...
#>  $ haul.id        : chr [1:5595] "1993:1:GFR:SOL:H20:21:1" "1993:1:GFR:SOL:H20:22:32" "1993:1:GFR:SOL:H20:23:31" "1993:1:GFR:SOL:H20:24:30" ...
#>  $ IDx            : chr [1:5595] "1993.1.GFR.06S1.H20.21.1" "1993.1.GFR.06S1.H20.22.32" "1993.1.GFR.06S1.H20.23.31" "1993.1.GFR.06S1.H20.24.30" ...
#>  $ ices_rect      : chr [1:5595] "38G3" "38G4" "38G4" "37G3" ...
#>  $ sub_div        : chr [1:5595] "24" "24" "24" "24" ...
#>  $ length_cm      : num [1:5595] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ id_haul_stomach: chr [1:5595] NA NA NA NA ...
#>  $ species        : chr [1:5595] "cod" "cod" "cod" "cod" ...
#>  $ haul.id.size   : chr [1:5595] "1993:1:GFR:SOL:H20:21:1.0" "1993:1:GFR:SOL:H20:22:32.0" "1993:1:GFR:SOL:H20:23:31.0" "1993:1:GFR:SOL:H20:24:30.0" ...
#>  $ substrate      : chr [1:5595] "sand" "sand" "sand" "sand" ...
#>  $ depth          : num [1:5595] 9 7 8 6 13 15 11 15 10 10 ...
#>  $ year_f         : Factor w/ 28 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(d_sub_oxy_q4)
#> tibble [3,666 × 17] (S3: tbl_df/tbl/data.frame)
#>  $ year           : int [1:3666] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#>  $ lat            : num [1:3666] 54.7 54.5 54.5 54.5 54.7 ...
#>  $ lon            : num [1:3666] 13.1 14.3 14.2 14 13.1 ...
#>  $ quarter        : int [1:3666] 4 4 4 4 4 4 4 4 4 4 ...
#>  $ Country        : chr [1:3666] "GFR" "GFR" "GFR" "GFR" ...
#>  $ Month          : int [1:3666] 12 11 11 11 12 12 11 11 11 11 ...
#>  $ haul.id        : chr [1:3666] "1993:4:GFR:SOL:H20:21:65" "1993:4:GFR:SOL:H20:22:43" "1993:4:GFR:SOL:H20:23:44" "1993:4:GFR:SOL:H20:24:46" ...
#>  $ IDx            : chr [1:3666] "1993.4.GFR.06S1.H20.21.65" "1993.4.GFR.06S1.H20.22.43" "1993.4.GFR.06S1.H20.23.44" "1993.4.GFR.06S1.H20.24.46" ...
#>  $ ices_rect      : chr [1:3666] "38G3" "38G4" "38G4" "37G4" ...
#>  $ sub_div        : chr [1:3666] "24" "24" "24" "24" ...
#>  $ length_cm      : num [1:3666] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ id_haul_stomach: chr [1:3666] NA NA NA NA ...
#>  $ species        : chr [1:3666] "cod" "cod" "cod" "cod" ...
#>  $ haul.id.size   : chr [1:3666] "1993:4:GFR:SOL:H20:21:65.0" "1993:4:GFR:SOL:H20:22:43.0" "1993:4:GFR:SOL:H20:23:44.0" "1993:4:GFR:SOL:H20:24:46.0" ...
#>  $ substrate      : chr [1:3666] "sand" "sand" "sand" "sand" ...
#>  $ depth          : num [1:3666] 8 7 8 7 16 17 11 18 10 10 ...
#>  $ year_f         : Factor w/ 28 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(big_dat_oxy)
#> tibble [9,261 × 5] (S3: tbl_df/tbl/data.frame)
#>  $ lon    : num [1:9261] 13 14.3 14.2 14 13.1 ...
#>  $ lat    : num [1:9261] 54.7 54.5 54.5 54.5 54.7 ...
#>  $ oxy    : num [1:9261] 8.49 8.89 8.9 8.89 8.45 ...
#>  $ year   : chr [1:9261] "1993" "1993" "1993" "1993" ...
#>  $ quarter: num [1:9261] 1 1 1 1 1 1 1 1 1 1 ...

# Create an ID for matching the oxygen data with the cpue data
dat$id_oxy <- paste(dat$year, dat$quarter, dat$lon, dat$lat, sep = "_")
big_dat_oxy$id_oxy <- paste(big_dat_oxy$year, big_dat_oxy$quarter, big_dat_oxy$lon, big_dat_oxy$lat, sep = "_")

# Which id's are NOT in the cpue data (dat)?
head(dat$id_oxy, 100)
#>   [1] "1993_1_13_54.6833"      "1993_1_14.2667_54.5167" "1993_1_14.15_54.5167"  
#>   [4] "1993_1_13.9833_54.4833" "1993_1_13.0833_54.7333" "1993_1_13.1833_54.7"   
#>   [7] "1993_1_13.8_54.55"      "1993_1_14.1167_54.7"    "1993_1_14.0833_54.5833"
#>  [10] "1993_1_13.8667_54.5833" "1993_1_14.15_54.6333"   "1993_1_13.3667_54.7333"
#>  [13] "1993_1_13.5167_54.7333" "1993_1_13.6333_54.6833" "1993_1_13.7333_54.6667"
#>  [16] "1993_1_13.85_54.7"      "1993_1_13.6667_54.7667" "1993_1_14.0167_54.85"  
#>  [19] "1993_1_13.8833_54.9667" "1993_1_13.6167_54.9167" "1993_1_13.2167_54.8667"
#>  [22] "1993_1_13.1_54.9333"    "1993_1_13.3167_54.9833" "1993_1_13.4333_55.0333"
#>  [25] "1993_1_13.75_55.0667"   "1993_1_14.1667_55.1333" "1993_1_14.2667_55.1667"
#>  [28] "1993_1_14.2833_55.0667" "1993_1_14.2333_55"      "1993_1_13.6167_55.05"  
#>  [31] "1993_1_13.8167_55.0833" "1993_1_16.6_54.8667"    "1993_1_15.1167_54.3"   
#>  [34] "1993_1_16.5167_54.8333" "1993_1_15.0167_54.4333" "1993_1_16.6333_54.85"  
#>  [37] "1993_1_15.7667_54.3667" "1993_1_16.0167_54.4333" "1993_1_17.5_55"        
#>  [40] "1993_1_15.3333_55.9333" "1993_1_15.7167_54.4333" "1993_1_16.2167_54.9167"
#>  [43] "1993_1_16.3_55.8333"    "1993_1_15.95_54.7833"   "1993_1_17.6167_56.0833"
#>  [46] "1993_1_15.6_55.9167"    "1993_1_15.5833_54.6167" "1993_1_16.2333_55.7833"
#>  [49] "1993_1_17.75_56.1333"   "1993_1_15.1167_55.6833" "1993_1_16.4333_55.45"  
#>  [52] "1993_1_14.8667_55.6167" "1993_1_15.6_54.7667"    "1993_1_16.35_55.3667"  
#>  [55] "1993_1_15.8833_55.6"    "1993_1_15.7_55.6"       "1993_1_17.35_55.2667"  
#>  [58] "1993_1_16.2_55.3"       "1993_1_15.1667_55.4333" "1993_1_17.55_55.25"    
#>  [61] "1993_1_16.05_55.1333"   "1993_1_17.0855_55.8667" "1993_1_17.1323_55.9248"
#>  [64] "1993_1_17.7367_55.8015" "1993_1_17.7188_56.0352" "1993_1_16.9745_56.445" 
#>  [67] "1993_1_16.8225_56.5178" "1993_1_17.94_57.004"    "1993_1_17.9145_57.066" 
#>  [70] "1993_1_16.9133_57.3685" "1993_1_17.0157_57.5172" "1993_1_17.5322_57.4502"
#>  [73] "1993_1_18.123_57.821"   "1993_1_18.3128_57.7555" "1993_1_19.4038_57.8736"
#>  [76] "1993_1_19.5368_57.8885" "1993_1_19.5493_57.8677" "1993_1_19.0568_57.5685"
#>  [79] "1993_1_19.1942_57.3825" "1993_1_18.8761_57.1808" "1993_1_18.8363_57.085" 
#>  [82] "1993_1_18.9005_57.089"  "1993_1_18.4828_56.2562" "1993_1_18.3732_55.5878"
#>  [85] "1993_1_16.9951_55.2882" "1993_1_16.0507_55.3282" "1993_1_15.3205_55.412" 
#>  [88] "1993_1_16.1217_55.795"  "1993_1_16.085_55.8733"  "1993_1_15.46_55.9933"  
#>  [91] "1993_1_15.4393_55.8218" "1993_1_14.7172_55.5687" "1993_1_14.5143_55.6712"
#>  [94] "1993_1_16.611_55.6187"  "1993_1_16.8935_55.785"  "1993_1_14.7333_55.455" 
#>  [97] "1993_1_14.5483_55.465"  "1993_1_13.9913_54.9983" "1993_1_13.6493_55.213" 
#> [100] "1993_1_13.32_54.935"
head(big_dat_oxy$id_oxy, 100)
#>   [1] "1993_1_13_54.6833"      "1993_1_14.2667_54.5167" "1993_1_14.15_54.5167"  
#>   [4] "1993_1_13.9833_54.4833" "1993_1_13.0833_54.7333" "1993_1_13.1833_54.7"   
#>   [7] "1993_1_13.8_54.55"      "1993_1_14.1167_54.7"    "1993_1_14.0833_54.5833"
#>  [10] "1993_1_13.8667_54.5833" "1993_1_14.15_54.6333"   "1993_1_13.3667_54.7333"
#>  [13] "1993_1_13.5167_54.7333" "1993_1_13.6333_54.6833" "1993_1_13.7333_54.6667"
#>  [16] "1993_1_13.85_54.7"      "1993_1_13.6667_54.7667" "1993_1_14.0167_54.85"  
#>  [19] "1993_1_13.8833_54.9667" "1993_1_13.6167_54.9167" "1993_1_13.2167_54.8667"
#>  [22] "1993_1_13.1_54.9333"    "1993_1_13.3167_54.9833" "1993_1_13.4333_55.0333"
#>  [25] "1993_1_13.75_55.0667"   "1993_1_14.1667_55.1333" "1993_1_14.2667_55.1667"
#>  [28] "1993_1_14.2833_55.0667" "1993_1_14.2333_55"      "1993_1_13.6167_55.05"  
#>  [31] "1993_1_13.8167_55.0833" "1993_1_16.6_54.8667"    "1993_1_15.1167_54.3"   
#>  [34] "1993_1_16.5167_54.8333" "1993_1_15.0167_54.4333" "1993_1_16.6333_54.85"  
#>  [37] "1993_1_15.7667_54.3667" "1993_1_16.0167_54.4333" "1993_1_17.5_55"        
#>  [40] "1993_1_15.3333_55.9333" "1993_1_15.7167_54.4333" "1993_1_16.2167_54.9167"
#>  [43] "1993_1_16.3_55.8333"    "1993_1_15.95_54.7833"   "1993_1_17.6167_56.0833"
#>  [46] "1993_1_15.6_55.9167"    "1993_1_15.5833_54.6167" "1993_1_16.2333_55.7833"
#>  [49] "1993_1_17.75_56.1333"   "1993_1_15.1167_55.6833" "1993_1_16.4333_55.45"  
#>  [52] "1993_1_14.8667_55.6167" "1993_1_15.6_54.7667"    "1993_1_16.35_55.3667"  
#>  [55] "1993_1_15.8833_55.6"    "1993_1_15.7_55.6"       "1993_1_17.35_55.2667"  
#>  [58] "1993_1_16.2_55.3"       "1993_1_15.1667_55.4333" "1993_1_17.55_55.25"    
#>  [61] "1993_1_16.05_55.1333"   "1993_1_17.0855_55.8667" "1993_1_17.1323_55.9248"
#>  [64] "1993_1_17.7367_55.8015" "1993_1_17.7188_56.0352" "1993_1_16.9745_56.445" 
#>  [67] "1993_1_16.8225_56.5178" "1993_1_17.94_57.004"    "1993_1_17.9145_57.066" 
#>  [70] "1993_1_16.9133_57.3685" "1993_1_17.0157_57.5172" "1993_1_17.5322_57.4502"
#>  [73] "1993_1_18.123_57.821"   "1993_1_18.3128_57.7555" "1993_1_19.4038_57.8736"
#>  [76] "1993_1_19.5368_57.8885" "1993_1_19.5493_57.8677" "1993_1_19.0568_57.5685"
#>  [79] "1993_1_19.1942_57.3825" "1993_1_18.8761_57.1808" "1993_1_18.8363_57.085" 
#>  [82] "1993_1_18.9005_57.089"  "1993_1_18.4828_56.2562" "1993_1_18.3732_55.5878"
#>  [85] "1993_1_16.9951_55.2882" "1993_1_16.0507_55.3282" "1993_1_15.3205_55.412" 
#>  [88] "1993_1_16.1217_55.795"  "1993_1_16.085_55.8733"  "1993_1_15.46_55.9933"  
#>  [91] "1993_1_15.4393_55.8218" "1993_1_14.7172_55.5687" "1993_1_14.5143_55.6712"
#>  [94] "1993_1_16.611_55.6187"  "1993_1_16.8935_55.785"  "1993_1_14.7333_55.455" 
#>  [97] "1993_1_14.5483_55.465"  "1993_1_13.9913_54.9983" "1993_1_13.6493_55.213" 
#> [100] "1993_1_13.32_54.935"
tail(dat$id_oxy, 100)
#>   [1] "2020_4_20.985_57.0167"  "2020_4_21.0083_57.0233" "2020_4_20.72_56.6267"  
#>   [4] "2020_4_20.7083_56.6233" "2020_4_20.68_56.625"    "2020_4_20.5983_56.64"  
#>   [7] "2020_4_20.2583_56.54"   "2020_4_19.8717_56.1317" "2020_4_20.5433_57.0467"
#>  [10] "2020_4_19.9117_56.17"   "2020_4_20.08_56.3833"   "2020_4_19.6617_56.2583"
#>  [13] "2020_4_20.59_57.1717"   "2020_4_20.6733_57.205"  "2020_4_21.1217_57.5067"
#>  [16] "2020_4_21.4283_57.4983" "2020_4_21.3433_57.41"   "2020_4_21.1033_57.255" 
#>  [19] "2020_4_16.6483_54.8733" "2020_4_15.5817_54.3833" "2020_4_15.7833_54.38"  
#>  [22] "2020_4_16.005_54.4233"  "2020_4_15.9917_54.405"  "2020_4_16.4183_54.6567"
#>  [25] "2020_4_16.6367_54.6933" "2020_4_16.8417_54.7517" "2020_4_16.8833_54.7417"
#>  [28] "2020_4_17.3667_54.9517" "2020_4_17.48_54.85"     "2020_4_17.3067_55.3167"
#>  [31] "2020_4_17.9567_55.735"  "2020_4_17.375_55.2567"  "2020_4_17.3967_54.9967"
#>  [34] "2020_4_17.31_55.235"    "2020_4_19.1983_54.3817" "2020_4_18.1983_54.895" 
#>  [37] "2020_4_18.5733_54.8567" "2020_4_18.015_55.69"    "2020_4_18.665_56.4567" 
#>  [40] "2020_4_18.6883_56.4733" "2020_4_18.96_55.1833"   "2020_4_18.91_55.4417"  
#>  [43] "2020_4_18.9967_55.5117" "2020_4_18.6083_56.4017" "2020_4_19.0183_54.4033"
#>  [46] "2020_4_18.76_54.7267"   "2020_4_19.2833_54.41"   "2020_4_18.535_55.485"  
#>  [49] "2020_4_18.6733_54.795"  "2020_4_18.3567_56.1767" "2020_4_18.2233_55.5283"
#>  [52] "2020_4_18.5167_54.9933" "2020_4_19.2283_54.395"  "2020_4_19.0567_54.4167"
#>  [55] "2020_4_19.3117_54.45"   "2020_4_19.0133_54.3967" "2020_4_19.0367_54.4167"
#>  [58] "2020_4_18.5033_54.8333" "2020_4_18.6067_54.915"  "2020_4_18.1817_54.8983"
#>  [61] "2020_4_18.93_54.525"    "2020_4_18.9267_54.51"   "2020_4_19.075_54.4383" 
#>  [64] "2020_4_19.04_54.4217"   "2020_4_19.325_54.575"   "2020_4_19.1717_54.6"   
#>  [67] "2020_4_19.1117_54.4333" "2020_4_16.9165_57.3647" "2020_4_17.0241_57.4144"
#>  [70] "2020_4_17.0912_57.4622" "2020_4_17.0977_57.6175" "2020_4_18.1196_57.7804"
#>  [73] "2020_4_17.5616_57.4828" "2020_4_19.1682_57.6135" "2020_4_19.3769_57.6885"
#>  [76] "2020_4_19.4807_57.7685" "2020_4_19.4891_58.0354" "2020_4_19.439_58.0746" 
#>  [79] "2020_4_19.5044_57.876"  "2020_4_19.2267_57.3361" "2020_4_18.9076_57.089" 
#>  [82] "2020_4_18.8636_57.1753" "2020_4_19.0668_57.2496" "2020_4_19.0591_57.3072"
#>  [85] "2020_4_19.1085_57.319"  "2020_4_17.9509_56.9956" "2020_4_17.9167_57.0532"
#>  [88] "2020_4_17.4146_57.2805" "2020_4_17.184_56.9584"  "2020_4_16.9939_56.7256"
#>  [91] "2020_4_16.85_56.5655"   "2020_4_13.6016_55.2125" "2020_4_13.925_55.286"  
#>  [94] "2020_4_13.9518_55.26"   "2020_4_14.4799_55.4589" "2020_4_14.5095_55.6877"
#>  [97] "2020_4_14.3632_55.6986" "2020_4_14.3795_55.7037" "2020_4_15.5667_55.883" 
#> [100] "2020_4_15.5703_55.8469"
tail(big_dat_oxy$id_oxy, 100)
#>   [1] "2020_4_20.985_57.0167"  "2020_4_21.0083_57.0233" "2020_4_20.72_56.6267"  
#>   [4] "2020_4_20.7083_56.6233" "2020_4_20.68_56.625"    "2020_4_20.5983_56.64"  
#>   [7] "2020_4_20.2583_56.54"   "2020_4_19.8717_56.1317" "2020_4_20.5433_57.0467"
#>  [10] "2020_4_19.9117_56.17"   "2020_4_20.08_56.3833"   "2020_4_19.6617_56.2583"
#>  [13] "2020_4_20.59_57.1717"   "2020_4_20.6733_57.205"  "2020_4_21.1217_57.5067"
#>  [16] "2020_4_21.4283_57.4983" "2020_4_21.3433_57.41"   "2020_4_21.1033_57.255" 
#>  [19] "2020_4_16.6483_54.8733" "2020_4_15.5817_54.3833" "2020_4_15.7833_54.38"  
#>  [22] "2020_4_16.005_54.4233"  "2020_4_15.9917_54.405"  "2020_4_16.4183_54.6567"
#>  [25] "2020_4_16.6367_54.6933" "2020_4_16.8417_54.7517" "2020_4_16.8833_54.7417"
#>  [28] "2020_4_17.3667_54.9517" "2020_4_17.48_54.85"     "2020_4_17.3067_55.3167"
#>  [31] "2020_4_17.9567_55.735"  "2020_4_17.375_55.2567"  "2020_4_17.3967_54.9967"
#>  [34] "2020_4_17.31_55.235"    "2020_4_19.1983_54.3817" "2020_4_18.1983_54.895" 
#>  [37] "2020_4_18.5733_54.8567" "2020_4_18.015_55.69"    "2020_4_18.665_56.4567" 
#>  [40] "2020_4_18.6883_56.4733" "2020_4_18.96_55.1833"   "2020_4_18.91_55.4417"  
#>  [43] "2020_4_18.9967_55.5117" "2020_4_18.6083_56.4017" "2020_4_19.0183_54.4033"
#>  [46] "2020_4_18.76_54.7267"   "2020_4_19.2833_54.41"   "2020_4_18.535_55.485"  
#>  [49] "2020_4_18.6733_54.795"  "2020_4_18.3567_56.1767" "2020_4_18.2233_55.5283"
#>  [52] "2020_4_18.5167_54.9933" "2020_4_19.2283_54.395"  "2020_4_19.0567_54.4167"
#>  [55] "2020_4_19.3117_54.45"   "2020_4_19.0133_54.3967" "2020_4_19.0367_54.4167"
#>  [58] "2020_4_18.5033_54.8333" "2020_4_18.6067_54.915"  "2020_4_18.1817_54.8983"
#>  [61] "2020_4_18.93_54.525"    "2020_4_18.9267_54.51"   "2020_4_19.075_54.4383" 
#>  [64] "2020_4_19.04_54.4217"   "2020_4_19.325_54.575"   "2020_4_19.1717_54.6"   
#>  [67] "2020_4_19.1117_54.4333" "2020_4_16.9165_57.3647" "2020_4_17.0241_57.4144"
#>  [70] "2020_4_17.0912_57.4622" "2020_4_17.0977_57.6175" "2020_4_18.1196_57.7804"
#>  [73] "2020_4_17.5616_57.4828" "2020_4_19.1682_57.6135" "2020_4_19.3769_57.6885"
#>  [76] "2020_4_19.4807_57.7685" "2020_4_19.4891_58.0354" "2020_4_19.439_58.0746" 
#>  [79] "2020_4_19.5044_57.876"  "2020_4_19.2267_57.3361" "2020_4_18.9076_57.089" 
#>  [82] "2020_4_18.8636_57.1753" "2020_4_19.0668_57.2496" "2020_4_19.0591_57.3072"
#>  [85] "2020_4_19.1085_57.319"  "2020_4_17.9509_56.9956" "2020_4_17.9167_57.0532"
#>  [88] "2020_4_17.4146_57.2805" "2020_4_17.184_56.9584"  "2020_4_16.9939_56.7256"
#>  [91] "2020_4_16.85_56.5655"   "2020_4_13.6016_55.2125" "2020_4_13.925_55.286"  
#>  [94] "2020_4_13.9518_55.26"   "2020_4_14.4799_55.4589" "2020_4_14.5095_55.6877"
#>  [97] "2020_4_14.3632_55.6986" "2020_4_14.3795_55.7037" "2020_4_15.5667_55.883" 
#> [100] "2020_4_15.5703_55.8469"
#dat %>% group_by(year) %>% summarise(n = n()) %>% as.data.frame()

ids <- dat$id_oxy[!dat$id_oxy %in% c(big_dat_oxy$id_oxy)]

dat %>% filter(id_oxy %in% ids)
#> filter: removed 9,261 rows (99%), 112 rows remaining
#> # A tibble: 112 × 17
#>     year   lat   lon quarter Country Month haul.id IDx   ices_…¹ sub_div lengt…²
#>    <int> <dbl> <dbl>   <int> <chr>   <int> <chr>   <chr> <chr>   <chr>     <dbl>
#>  1  1993  57.4  16.9       3 SWE         8 1993:3… 1993… 43G6    27            0
#>  2  1993  57.5  17.1       3 SWE         8 1993:3… 1993… 43G7    27            0
#>  3  1993  57.5  17.6       3 SWE         8 1993:3… 1993… 43G7    27            0
#>  4  1993  57.1  17.9       3 SWE         8 1993:3… 1993… 43G7    27            0
#>  5  1993  57.3  17.9       3 SWE         8 1993:3… 1993… 43G7    27            0
#>  6  1993  57.8  18.1       3 SWE         8 1993:3… 1993… 44G8    27            0
#>  7  1993  57.8  18.3       3 SWE         8 1993:3… 1993… 44G8    27            0
#>  8  1993  57.8  19.5       3 SWE         8 1993:3… 1993… 44G9    28            0
#>  9  1993  57.9  19.5       3 SWE         8 1993:3… 1993… 44G9    28            0
#> 10  1993  57.9  19.5       3 SWE         8 1993:3… 1993… 44G9    28            0
#> # … with 102 more rows, 6 more variables: id_haul_stomach <chr>, species <chr>,
#> #   haul.id.size <chr>, substrate <chr>, depth <dbl>, id_oxy <chr>, and
#> #   abbreviated variable names ¹​ices_rect, ²​length_cm

# Select only the columns we want to merge
big_dat_sub_oxy <- big_dat_oxy %>% dplyr::select(id_oxy, oxy)

# Remove duplicate ID (one oxy value per id)
big_dat_sub_oxy %>% group_by(id_oxy) %>% mutate(n = n()) %>% arrange(desc(n))
#> group_by: one grouping variable (id_oxy)
#> mutate (grouped): new variable 'n' (integer) with 4 unique values and 0% NA
#> # A tibble: 9,261 × 3
#> # Groups:   id_oxy [9,198]
#>    id_oxy                   oxy     n
#>    <chr>                  <dbl> <int>
#>  1 1999_1_15.3667_54.5667  7.05     4
#>  2 1999_1_15.3667_54.5667  7.05     4
#>  3 1999_1_15.3667_54.5667  7.05     4
#>  4 1999_1_15.3667_54.5667  7.05     4
#>  5 1994_1_18.7833_54.7     8.62     3
#>  6 1994_1_18.7833_54.7     8.62     3
#>  7 1994_1_18.7833_54.7     8.62     3
#>  8 2003_1_20.3333_56.6333  4.30     3
#>  9 2003_1_20.3333_56.6333  4.30     3
#> 10 2003_1_20.3333_56.6333  4.30     3
#> # … with 9,251 more rows
big_dat_sub_oxy2 <- big_dat_sub_oxy %>% distinct(id_oxy, .keep_all = TRUE)
#> distinct: removed 63 rows (1%), 9,198 rows remaining

Temperature

# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1664183191233.nc")
                                        
print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1664183191233.nc (NC_FORMAT_CLASSIC):
#> 
#>      1 variables (excluding dimension variables):
#>         float bottomT[longitude,latitude,time]   
#>             standard_name: sea_water_potential_temperature_at_sea_floor
#>             units: degrees_C
#>             long_name: Sea floor potential temperature
#>             missing_value: NaN
#>             _FillValue: NaN
#>             _ChunkSizes: 1
#>              _ChunkSizes: 523
#>              _ChunkSizes: 383
#> 
#>      3 dimensions:
#>         time  Size:336
#>             axis: T
#>             long_name: Validity time
#>             standard_name: time
#>             units: days since 1950-01-01 00:00:00
#>             calendar: gregorian
#>             _ChunkSizes: 512
#>             _CoordinateAxisType: Time
#>             valid_min: 15721.5
#>             valid_max: 25917.5
#>         latitude  Size:523
#>             axis: Y
#>             standard_name: latitude
#>             long_name: latitude
#>             units: degrees_north
#>             _CoordinateAxisType: Lat
#>             valid_min: 48.49169921875
#>             valid_max: 65.8914184570312
#>         longitude  Size:383
#>             standard_name: longitude
#>             long_name: longitude
#>             units: degrees_east
#>             axis: X
#>             _CoordinateAxisType: Lon
#>             valid_min: 9.01375484466553
#>             valid_max: 30.2357654571533
#> 
#>     24 global attributes:
#>         references: http://www.smhi.se
#>         institution: Swedish Meterological and Hydrological Institute
#>         history: See source and creation_date attributees
#>         Conventions: CF-1.5
#>         contact: servicedesk_cmems@mercator-ocean.eu
#>         comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#>         bullentin_type: reanalysis
#>         cmems_product_id: BALTICSEA_REANALYSIS_PHY_003_011
#>         title: CMEMS V4 Reanalysis: NEMO model 3D fields (monthly means)
#>         FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#>         FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#>         FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#>         FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#>         shallowest_depth: 1.50136542320251
#>         deepest_depth: 711.059204101562
#>         source: SMHI reanalysis run NORDIC-NS2_1d_20201201_20201201
#>         file_quality_index: 1
#>         creation_date: 2021-11-09 UTC
#>         bullentin_date: 20201201
#>         start_date: 2020-12-01 UTC
#>         stop_date: 2020-12-01 UTC
#>         start_time: 00:00 UTC
#>         stop_time: 00:00 UTC
#>         _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention

# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530

lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 48.49170 48.52503 48.55836 48.59170 48.62503 48.65836

# Get time
time <- ncvar_get(ncin,"time")
time
#>   [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#>  [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#>  [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#>  [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#>  [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#>  [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#>  [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#>  [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#>  [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#>  [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#>  [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5
#> [325] 25582.5 25612.5 25642.5 25673.0 25703.5 25734.0 25764.5 25795.5 25826.0
#> [334] 25856.5 25887.0 25917.5

tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 336
tunits
#> $hasatt
#> [1] TRUE
#> 
#> $value
#> [1] "days since 1950-01-01 00:00:00"

# Get temperature
dname <- "bottomT"

temp_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(temp_array)
#> [1] 383 523 336

# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])

# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))

# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)

# Replace netCDF fill values with NA's
temp_array[temp_array == fillvalue$value] <- NA

# Next, we need to work with the months that correspond to the quarters that we use.
# loop through each time step, and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#>   [1]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#>  [26]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#>  [51]  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3
#>  [76]  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4
#> [101]  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5
#> [126]  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6
#> [151]  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7
#> [176]  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8
#> [201]  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9
#> [226] 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10
#> [251] 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11
#> [276] 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12
#> [301]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#> [326]  2  3  4  5  6  7  8  9 10 11 12

index_keep_q1 <- which(months < 4)
index_keep_q4 <- which(months > 9)

temp_q1 <- temp_array[, , index_keep_q1]
temp_q4 <- temp_array[, , index_keep_q4]

months_keep_q1 <- months[index_keep_q1]
months_keep_q4 <- months[index_keep_q4]

years_keep_q1 <- years[index_keep_q1]
years_keep_q4 <- years[index_keep_q4]

# Now we have an array with data for that quarter
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq_q1 <- seq(1, dim(temp_q1)[3], by = 3)
loop_seq_q4 <- seq(1, dim(temp_q4)[3], by = 3)

# Create objects that will hold data
dlist_q1 <- list()
dlist_q4 <- list()

temp_1 <- c()
temp_2 <- c()
temp_3 <- c()
temp_ave_q1 <- c()

temp_10 <- c()
temp_11 <- c()
temp_12 <- c()
temp_ave_q4 <- c()

# Now average by quarter. The vector loop_seq_q1 is 1, 4, 7 etc. So first i is 1, 2, 3,
# which is the index we want. 

for(i in loop_seq_q1) {
  
  temp_1 <- temp_q1[, , (i)]
  temp_2 <- temp_q1[, , (i + 1)]
  temp_3 <- temp_q1[, , (i + 2)]
  
  temp_10 <- temp_q4[, , (i)]
  temp_11 <- temp_q4[, , (i + 1)]
  temp_12 <- temp_q4[, , (i + 2)]
  
  temp_ave_q1 <- (temp_1 + temp_2 + temp_3) / 3
  temp_ave_q4 <- (temp_10 + temp_11 + temp_12) / 3
  
  list_pos_q1 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  list_pos_q4 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  
  dlist_q1[[list_pos_q1]] <- temp_ave_q1
  dlist_q4[[list_pos_q4]] <- temp_ave_q4
  
}

# Now name the lists with the year:
names(dlist_q1) <- unique(years_keep_q1)
names(dlist_q4) <- unique(years_keep_q4)

# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script

# Filter years in the cpue data frame to only have the years I have temperature for
d_sub_temp_q1 <- dat %>% filter(quarter == 1) %>% filter(year %in% names(dlist_q1)) %>% droplevels()
#> filter: removed 3,778 rows (40%), 5,595 rows remaining
#> filter: no rows removed
d_sub_temp_q4 <- dat %>% filter(quarter == 4) %>% filter(year %in% names(dlist_q4)) %>% droplevels()
#> filter: removed 5,707 rows (61%), 3,666 rows remaining
#> filter: no rows removed

# Create data holding object
temp_data_list_q1 <- list()
temp_data_list_q4 <- list()

# ... And for the temperature raster
raster_list_q1 <- list()
raster_list_q4 <- list()

# Create factor year for indexing the list in the loop
d_sub_temp_q1$year_f <- as.factor(d_sub_temp_q1$year)
d_sub_temp_q4$year_f <- as.factor(d_sub_temp_q4$year)

# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_temp_q1$year_f)) {
  
  # Set plot limits
  ymin = 54; ymax = 58; xmin = 12; xmax = 22
  
  # Subset a year
  temp_slice_q1 <- dlist_q1[[i]]
  temp_slice_q4 <- dlist_q4[[i]]
  
  # Create raster for that year (i)
  r_q1 <- raster(t(temp_slice_q1), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  r_q4 <- raster(t(temp_slice_q4), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  
  # Flip...
  r_q1 <- flip(r_q1, direction = 'y')
  r_q4 <- flip(r_q4, direction = 'y')
  
  plot(r_q1, main = paste(i, "Q1"))
  plot(r_q4, main = paste(i, "Q4"))
  
  # Filter the same year (i) in the cpue data and select only coordinates
  d_slice_q1 <- d_sub_temp_q1 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  d_slice_q4 <- d_sub_temp_q4 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  
  # Make into a SpatialPoints object
  data_sp_q1 <- SpatialPoints(d_slice_q1)
  data_sp_q4 <- SpatialPoints(d_slice_q4)
  
  # Extract raster value (temperature)
  rasValue_q1 <- raster::extract(r_q1, data_sp_q1)
  rasValue_q4 <- raster::extract(r_q4, data_sp_q4)
  
  # Now we want to plot the results of the raster extractions by plotting the cpue
  # data points over a raster and saving it for each year.
  # Make the SpatialPoints object into a raster again (for pl)
  df_q1 <- as.data.frame(data_sp_q1)
  df_q4 <- as.data.frame(data_sp_q4)
  
  # Add in the raster value in the df holding the coordinates for the cpue data
  d_slice_q1$temp <- rasValue_q1
  d_slice_q4$temp <- rasValue_q4
  
  # Add in which year
  d_slice_q1$year <- i
  d_slice_q4$year <- i
  
  # Create a index for the data last where we store all years (because our loop index
  # i is not continuous, we can't use it directly)
  index_q1 <- as.numeric(d_slice_q1$year)[1] - 1992
  index_q4 <- as.numeric(d_slice_q4$year)[1] - 1992
  
  # Add each years' data in the list
  temp_data_list_q1[[index_q1]] <- d_slice_q1
  temp_data_list_q4[[index_q4]] <- d_slice_q4
  
  # Save to check each year is ok! First convert the raster to points for plotting
  # (so that we can use ggplot)
  map_q1 <- rasterToPoints(r_q1)
  map_q4 <- rasterToPoints(r_q4)
  
  # Make the points a dataframe for ggplot
  df_rast_q1 <- data.frame(map_q1)
  df_rast_q4 <- data.frame(map_q4)
  
  # Rename y-variable and add year
  df_rast_q1 <- df_rast_q1 %>% rename("temp" = "layer") %>% mutate(year = i)
  df_rast_q4 <- df_rast_q4 %>% rename("temp" = "layer") %>% mutate(year = i)
  
  # Add each years' raster data frame in the list
  raster_list_q1[[index_q1]] <- df_rast_q1
  raster_list_q4[[index_q4]] <- df_rast_q4
  
  # Make appropriate column headings
  colnames(df_rast_q1) <- c("Longitude", "Latitude", "temp")
  colnames(df_rast_q4) <- c("Longitude", "Latitude", "temp")
  
  # Make a map for q1
  ggplot(data = df_rast_q1, aes(y = Latitude, x = Longitude)) +
    geom_raster(aes(fill = temp)) +
    geom_point(data = d_slice_q1, aes(x = lon, y = lat, fill = temp),
               color = "black", size = 5, shape = 21) +
    theme_bw() +
    geom_sf(data = world, inherit.aes = F, size = 0.2) +
    coord_sf(xlim = c(xmin, xmax),
             ylim = c(ymin, ymax)) +
    scale_colour_gradientn(colours = rev(terrain.colors(10)),
                           limits = c(-2, 17)) +
    scale_fill_gradientn(colours = rev(terrain.colors(10)),
                         limits = c(-2, 17)) +
    NULL

  ggsave(paste("figures/supp/cpue_temp_rasters/", i,"q1.png", sep = ""),
         width = 6.5, height = 6.5, dpi = 600)

  # Make a map for q4
  ggplot(data = df_rast_q4, aes(y = Latitude, x = Longitude)) +
    geom_raster(aes(fill = temp)) +
    geom_point(data = d_slice_q4, aes(x = lon, y = lat, fill = temp),
               color = "black", size = 5, shape = 21) +
    theme_bw() +
    geom_sf(data = world, inherit.aes = F, size = 0.2) +
    coord_sf(xlim = c(xmin, xmax),
             ylim = c(ymin, ymax)) +
    scale_colour_gradientn(colours = rev(terrain.colors(10)),
                           limits = c(-2, 17)) +
    scale_fill_gradientn(colours = rev(terrain.colors(10)),
                         limits = c(-2, 17)) +
    NULL

  ggsave(paste("figures/supp/cpue_temp_rasters/", i,"q4.png", sep = ""),
         width = 6.5, height = 6.5, dpi = 600)

}

#> filter: removed 5,494 rows (98%), 101 rows remaining
#> filter: removed 3,605 rows (98%), 61 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,420 rows (97%), 175 rows remaining
#> filter: removed 3,604 rows (98%), 62 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,441 rows (97%), 154 rows remaining
#> filter: removed 3,613 rows (99%), 53 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,412 rows (97%), 183 rows remaining
#> filter: removed 3,605 rows (98%), 61 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,381 rows (96%), 214 rows remaining
#> filter: removed 3,591 rows (98%), 75 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,367 rows (96%), 228 rows remaining
#> filter: removed 3,598 rows (98%), 68 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,405 rows (97%), 190 rows remaining
#> filter: removed 3,572 rows (97%), 94 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,485 rows (98%), 110 rows remaining
#> filter: removed 3,577 rows (98%), 89 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,387 rows (96%), 208 rows remaining
#> filter: removed 3,550 rows (97%), 116 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,450 rows (97%), 145 rows remaining
#> filter: removed 3,549 rows (97%), 117 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,434 rows (97%), 161 rows remaining
#> filter: removed 3,538 rows (97%), 128 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,388 rows (96%), 207 rows remaining
#> filter: removed 3,557 rows (97%), 109 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,377 rows (96%), 218 rows remaining
#> filter: removed 3,515 rows (96%), 151 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,407 rows (97%), 188 rows remaining
#> filter: removed 3,516 rows (96%), 150 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,360 rows (96%), 235 rows remaining
#> filter: removed 3,497 rows (95%), 169 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,373 rows (96%), 222 rows remaining
#> filter: removed 3,490 rows (95%), 176 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,336 rows (95%), 259 rows remaining
#> filter: removed 3,486 rows (95%), 180 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,355 rows (96%), 240 rows remaining
#> filter: removed 3,504 rows (96%), 162 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,352 rows (96%), 243 rows remaining
#> filter: removed 3,486 rows (95%), 180 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,371 rows (96%), 224 rows remaining
#> filter: removed 3,533 rows (96%), 133 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,327 rows (95%), 268 rows remaining
#> filter: removed 3,518 rows (96%), 148 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,387 rows (96%), 208 rows remaining
#> filter: removed 3,490 rows (95%), 176 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,358 rows (96%), 237 rows remaining
#> filter: removed 3,500 rows (95%), 166 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,363 rows (96%), 232 rows remaining
#> filter: removed 3,453 rows (94%), 213 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,350 rows (96%), 245 rows remaining
#> filter: removed 3,441 rows (94%), 225 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,339 rows (95%), 256 rows remaining
#> filter: removed 3,456 rows (94%), 210 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,477 rows (98%), 118 rows remaining
#> filter: removed 3,574 rows (97%), 92 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,469 rows (98%), 126 rows remaining
#> filter: removed 3,564 rows (97%), 102 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA


# Now create a data frame from the list of all annual values
big_dat_temp_q1 <- dplyr::bind_rows(temp_data_list_q1)
big_dat_temp_q4 <- dplyr::bind_rows(temp_data_list_q4)
big_dat_temp <- bind_rows(mutate(big_dat_temp_q1, quarter = 1),
                          mutate(big_dat_temp_q4, quarter = 4))
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA

big_raster_dat_temp_q1 <- dplyr::bind_rows(raster_list_q1)
big_raster_dat_temp_q4 <- dplyr::bind_rows(raster_list_q4)
big_raster_dat_temp <- bind_rows(mutate(big_raster_dat_temp_q1, quarter = 1),
                                 mutate(big_raster_dat_temp_q4, quarter = 4))
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA

# Plot data, looks like there's big inter-annual variation but a positive trend
big_raster_dat_temp %>%
  group_by(quarter, year) %>%
  drop_na(temp) %>%
  summarise(mean_temp = mean(temp)) %>%
  mutate(year_num = as.numeric(year)) %>%
  ggplot(aes(year_num, mean_temp)) +
  geom_point(size = 2) +
  stat_smooth(method = "lm") +
  facet_wrap(~ quarter) +
  NULL
#> group_by: 2 grouping variables (quarter, year)
#> drop_na (grouped): no rows removed
#> summarise: now 56 rows and 3 columns, one group variable remaining (quarter)
#> mutate (grouped): new variable 'year_num' (double) with 28 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'


# Now add in the new temperature column in the original data:
str(d_sub_temp_q1)
#> tibble [5,595 × 18] (S3: tbl_df/tbl/data.frame)
#>  $ year           : int [1:5595] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#>  $ lat            : num [1:5595] 54.7 54.5 54.5 54.5 54.7 ...
#>  $ lon            : num [1:5595] 13 14.3 14.2 14 13.1 ...
#>  $ quarter        : int [1:5595] 1 1 1 1 1 1 1 1 1 1 ...
#>  $ Country        : chr [1:5595] "GFR" "GFR" "GFR" "GFR" ...
#>  $ Month          : int [1:5595] 2 2 2 2 2 2 2 2 2 2 ...
#>  $ haul.id        : chr [1:5595] "1993:1:GFR:SOL:H20:21:1" "1993:1:GFR:SOL:H20:22:32" "1993:1:GFR:SOL:H20:23:31" "1993:1:GFR:SOL:H20:24:30" ...
#>  $ IDx            : chr [1:5595] "1993.1.GFR.06S1.H20.21.1" "1993.1.GFR.06S1.H20.22.32" "1993.1.GFR.06S1.H20.23.31" "1993.1.GFR.06S1.H20.24.30" ...
#>  $ ices_rect      : chr [1:5595] "38G3" "38G4" "38G4" "37G3" ...
#>  $ sub_div        : chr [1:5595] "24" "24" "24" "24" ...
#>  $ length_cm      : num [1:5595] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ id_haul_stomach: chr [1:5595] NA NA NA NA ...
#>  $ species        : chr [1:5595] "cod" "cod" "cod" "cod" ...
#>  $ haul.id.size   : chr [1:5595] "1993:1:GFR:SOL:H20:21:1.0" "1993:1:GFR:SOL:H20:22:32.0" "1993:1:GFR:SOL:H20:23:31.0" "1993:1:GFR:SOL:H20:24:30.0" ...
#>  $ substrate      : chr [1:5595] "sand" "sand" "sand" "sand" ...
#>  $ depth          : num [1:5595] 9 7 8 6 13 15 11 15 10 10 ...
#>  $ id_oxy         : chr [1:5595] "1993_1_13_54.6833" "1993_1_14.2667_54.5167" "1993_1_14.15_54.5167" "1993_1_13.9833_54.4833" ...
#>  $ year_f         : Factor w/ 28 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(d_sub_temp_q4)
#> tibble [3,666 × 18] (S3: tbl_df/tbl/data.frame)
#>  $ year           : int [1:3666] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#>  $ lat            : num [1:3666] 54.7 54.5 54.5 54.5 54.7 ...
#>  $ lon            : num [1:3666] 13.1 14.3 14.2 14 13.1 ...
#>  $ quarter        : int [1:3666] 4 4 4 4 4 4 4 4 4 4 ...
#>  $ Country        : chr [1:3666] "GFR" "GFR" "GFR" "GFR" ...
#>  $ Month          : int [1:3666] 12 11 11 11 12 12 11 11 11 11 ...
#>  $ haul.id        : chr [1:3666] "1993:4:GFR:SOL:H20:21:65" "1993:4:GFR:SOL:H20:22:43" "1993:4:GFR:SOL:H20:23:44" "1993:4:GFR:SOL:H20:24:46" ...
#>  $ IDx            : chr [1:3666] "1993.4.GFR.06S1.H20.21.65" "1993.4.GFR.06S1.H20.22.43" "1993.4.GFR.06S1.H20.23.44" "1993.4.GFR.06S1.H20.24.46" ...
#>  $ ices_rect      : chr [1:3666] "38G3" "38G4" "38G4" "37G4" ...
#>  $ sub_div        : chr [1:3666] "24" "24" "24" "24" ...
#>  $ length_cm      : num [1:3666] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ id_haul_stomach: chr [1:3666] NA NA NA NA ...
#>  $ species        : chr [1:3666] "cod" "cod" "cod" "cod" ...
#>  $ haul.id.size   : chr [1:3666] "1993:4:GFR:SOL:H20:21:65.0" "1993:4:GFR:SOL:H20:22:43.0" "1993:4:GFR:SOL:H20:23:44.0" "1993:4:GFR:SOL:H20:24:46.0" ...
#>  $ substrate      : chr [1:3666] "sand" "sand" "sand" "sand" ...
#>  $ depth          : num [1:3666] 8 7 8 7 16 17 11 18 10 10 ...
#>  $ id_oxy         : chr [1:3666] "1993_4_13.05_54.6667" "1993_4_14.3167_54.5167" "1993_4_14.2_54.5167" "1993_4_14.0333_54.4833" ...
#>  $ year_f         : Factor w/ 28 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(big_dat_temp)
#> tibble [9,261 × 5] (S3: tbl_df/tbl/data.frame)
#>  $ lon    : num [1:9261] 13 14.3 14.2 14 13.1 ...
#>  $ lat    : num [1:9261] 54.7 54.5 54.5 54.5 54.7 ...
#>  $ temp   : num [1:9261] 3.36 3.2 3.15 3.14 3.33 ...
#>  $ year   : chr [1:9261] "1993" "1993" "1993" "1993" ...
#>  $ quarter: num [1:9261] 1 1 1 1 1 1 1 1 1 1 ...

# Create an ID for matching the temperature data with the cpue data
dat$id_temp <- paste(dat$year, dat$quarter, dat$lon, dat$lat, sep = "_")
big_dat_temp$id_temp <- paste(big_dat_temp$year, big_dat_temp$quarter, big_dat_temp$lon, big_dat_temp$lat, sep = "_")

# Which id's are not in the cpue data (dat)? (It's because I don't have those years, not about the location)
ids <- dat$id_temp[!dat$id_temp %in% c(big_dat_temp$id_temp)]

unique(ids)
#>   [1] "1993_3_16.8935_57.3822" "1993_3_17.0851_57.4723" "1993_3_17.5683_57.4833"
#>   [4] "1993_3_17.9133_57.0566" "1993_3_17.8833_57.3383" "1993_3_18.1158_57.758" 
#>   [7] "1993_3_18.3158_57.7593" "1993_3_19.5258_57.8453" "1993_3_19.465_57.855"  
#>  [10] "1993_3_19.4568_57.9165" "1993_3_19.6068_57.9796" "1993_3_19.2133_57.3967"
#>  [13] "1993_3_18.8516_57.175"  "1993_3_18.8395_57.0797" "1993_3_18.8798_57.0675"
#>  [16] "1993_3_18.4721_56.2501" "1993_3_17.7942_56.0955" "1993_3_17.6995_55.8498"
#>  [19] "1994_3_17.9067_57.0966" "1994_3_17.9883_57.0383" "1994_3_17.625_55.4333" 
#>  [22] "1994_3_17.1167_55.925"  "1994_3_16.5283_55.6783" "1994_3_15.9783_55.4783"
#>  [25] "1994_3_16.0417_55.315"  "1994_3_14.73_55.4583"   "1994_3_14.4983_55.46"  
#>  [28] "1994_3_14.4967_55.665"  "1994_3_15.375_55.9466"  "1995_3_13.225_55.21"   
#>  [31] "1995_3_13.585_55.21"    "1995_3_13.26_54.965"    "1995_3_13.9067_55.01"  
#>  [34] "1995_3_15.4167_55.9767" "1995_3_15.2933_55.6"    "1995_3_14.5017_55.6667"
#>  [37] "1995_3_14.6167_55.5933" "1995_3_14.495_55.46"    "1995_3_14.6833_55.4567"
#>  [40] "1995_3_16.3217_55.525"  "1995_3_18.4017_55.5533" "1995_3_18.9083_56.0017"
#>  [43] "1995_3_18.4633_56.24"   "1995_3_17.775_56.0817"  "1995_3_18.875_57.0617" 
#>  [46] "1995_3_18.8617_57.175"  "1995_3_17.95_56.9933"   "1995_3_17.915_57.065"  
#>  [49] "1995_3_17.6983_55.8466" "1995_3_17.11_55.9283"   "1996_2_18.8_54.6833"   
#>  [52] "1996_2_18.75_54.7333"   "1996_2_18.6833_54.7833" "1996_2_18.7_54.7333"   
#>  [55] "1996_2_18.6_54.8"       "1996_2_18.6333_54.8333" "1996_2_18.6333_54.8833"
#>  [58] "1996_2_18.65_54.8833"   "1996_2_18.6833_54.8833" "1996_2_18.7167_54.8667"
#>  [61] "1996_2_18.7333_54.9"    "1996_2_17.4_55.4667"    "1996_2_17.4333_55.4333"
#>  [64] "1996_2_17.6167_55.4667" "1996_2_17.5833_55.4"    "1996_2_17.45_55.35"    
#>  [67] "1996_2_17.3667_55.2333" "1996_2_18.85_54.9333"   "1997_2_20.5167_56.5667"
#>  [70] "1997_2_19.6833_56.15"   "1997_2_19.2667_56.0667" "1997_2_19.5667_56.3"   
#>  [73] "1997_2_19.8_56.3833"    "1997_2_19.9667_56.4833" "1997_2_20.2333_56.5667"
#>  [76] "1997_2_20.5333_56.65"   "1997_2_20.8_56.7167"    "1997_2_20.8167_56.9167"
#>  [79] "1997_2_20.7833_56.9833" "1997_2_20.35_56.45"     "1997_2_20.2167_56.4667"
#>  [82] "1997_2_20.1333_56.3167" "1997_2_19.8_56.2333"    "1998_2_21.4_57.7167"   
#>  [85] "1998_2_20.6667_57.1333" "1998_2_20.6_57.0833"    "1998_2_20.25_56.9333"  
#>  [88] "1998_2_20.3_56.65"      "1998_2_20.2667_56.5667" "1998_2_20.3333_56.4667"
#>  [91] "1998_2_19.8_56.3833"    "1998_2_21.3667_57.7833" "1998_2_21.2667_57.8333"
#>  [94] "1998_2_21.1167_57.8333" "1998_2_21.1667_57.5667" "1998_2_21.15_57.4833"  
#>  [97] "1998_2_21.2667_57.3667" "1998_2_21.1167_57.25"   "1998_2_20.7167_57.25"  
#> [100] "1999_2_21.1_57.05"      "1999_2_21.1167_57.0333" "1999_2_20.7167_57.1833"
#> [103] "1999_2_20.7333_57.2167" "1999_2_20.7333_57.2667" "1999_2_20.55_57.1667"  
#> [106] "1999_2_20.65_57.05"     "1999_2_20.7167_57.3667" "1999_2_20.8667_57.35"  
#> [109] "1999_2_21.1167_57.25"   "1999_2_21.15_57.2333"   "1999_2_21.2667_57.35"
length(unique(ids))
#> [1] 111
length(unique(dat$id_temp))
#> [1] 9309

# Select only the columns we want to merge
big_dat_sub_temp <- big_dat_temp %>% dplyr::select(id_temp, temp)

# Remove duplicate ID (one temp value per id)
big_dat_sub_temp2 <- big_dat_sub_temp %>% distinct(id_temp, .keep_all = TRUE)
#> distinct: removed 63 rows (1%), 9,198 rows remaining

Bottom salinity

# https://data.marine.copernicus.eu/product/BALTICSEA_REANALYSIS_PHY_003_011/download?dataset=dataset-reanalysis-nemo-monthlymeans

# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1668587452211.nc")

print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1668587452211.nc (NC_FORMAT_CLASSIC):
#> 
#>      1 variables (excluding dimension variables):
#>         float sob[longitude,latitude,time]   
#>             long_name: Sea water salinity at sea floor
#>             missing_value: NaN
#>             standard_name: sea_water_salinity
#>             units: 0.001
#>             _FillValue: NaN
#>             _ChunkSizes: 1
#>              _ChunkSizes: 523
#>              _ChunkSizes: 383
#> 
#>      3 dimensions:
#>         time  Size:335
#>             axis: T
#>             long_name: Validity time
#>             standard_name: time
#>             units: days since 1950-01-01 00:00:00
#>             calendar: gregorian
#>             _ChunkSizes: 512
#>             _CoordinateAxisType: Time
#>             valid_min: 15751
#>             valid_max: 25917.5
#>         latitude  Size:187
#>             axis: Y
#>             standard_name: latitude
#>             long_name: latitude
#>             units: degrees_north
#>             _CoordinateAxisType: Lat
#>             valid_min: 53.1249580383301
#>             valid_max: 59.3248596191406
#>         longitude  Size:199
#>             standard_name: longitude
#>             long_name: longitude
#>             units: degrees_east
#>             axis: X
#>             _CoordinateAxisType: Lon
#>             valid_min: 11.1248445510864
#>             valid_max: 22.12473487854
#> 
#>     24 global attributes:
#>         references: http://www.smhi.se
#>         institution: Swedish Meterological and Hydrological Institute
#>         history: See source and creation_date attributees
#>         Conventions: CF-1.5
#>         contact: servicedesk_cmems@mercator-ocean.eu
#>         comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#>         bullentin_type: reanalysis
#>         cmems_product_id: BALTICSEA_REANALYSIS_PHY_003_011
#>         title: CMEMS V4 Reanalysis: NEMO model 3D fields (monthly means)
#>         FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#>         FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#>         FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#>         FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#>         shallowest_depth: 1.50136542320251
#>         deepest_depth: 711.059204101562
#>         source: SMHI reanalysis run NORDIC-NS2_1d_20201201_20201201
#>         file_quality_index: 1
#>         creation_date: 2021-11-09 UTC
#>         bullentin_date: 20201201
#>         start_date: 2020-12-01 UTC
#>         stop_date: 2020-12-01 UTC
#>         start_time: 00:00 UTC
#>         stop_time: 00:00 UTC
#>         _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention

# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 11.12484 11.18040 11.23596 11.29151 11.34706 11.40262

lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 53.12496 53.15829 53.19162 53.22496 53.25829 53.29162

# Get time
time <- ncvar_get(ncin,"time")
time
#>   [1] 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0 15994.5
#>  [10] 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0 16267.5
#>  [19] 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5 16541.0
#>  [28] 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5 16816.5
#>  [37] 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0 17090.5
#>  [46] 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0 17363.5
#>  [55] 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5 17637.0
#>  [64] 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5 17912.5
#>  [73] 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0 18185.5
#>  [82] 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0 18459.5
#>  [91] 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5 18733.0
#> [100] 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5 19008.5
#> [109] 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0 19281.5
#> [118] 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0 19554.5
#> [127] 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5 19829.0
#> [136] 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5 20104.5
#> [145] 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0 20377.5
#> [154] 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0 20650.5
#> [163] 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5 20924.0
#> [172] 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5 21199.5
#> [181] 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0 21473.5
#> [190] 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0 21746.5
#> [199] 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5 22020.0
#> [208] 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5 22295.5
#> [217] 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0 22568.5
#> [226] 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0 22842.5
#> [235] 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5 23116.0
#> [244] 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5 23391.5
#> [253] 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0 23664.5
#> [262] 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0 23937.5
#> [271] 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5 24212.0
#> [280] 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5 24487.5
#> [289] 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0 24760.5
#> [298] 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0 25033.5
#> [307] 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5 25307.0
#> [316] 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5 25582.5
#> [325] 25612.5 25642.5 25673.0 25703.5 25734.0 25764.5 25795.5 25826.0 25856.5
#> [334] 25887.0 25917.5

tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 335
tunits
#> $hasatt
#> [1] TRUE
#> 
#> $value
#> [1] "days since 1950-01-01 00:00:00"

# Get Salinity
dname <- "sob"

sal_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(sal_array)
#> [1] 199 187 335

# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])

# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))

# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)

# Replace netCDF fill values with NA's
sal_array[sal_array == fillvalue$value] <- NA

# Next, we need to work with the months that correspond to the quarters that we use.
# loop through each time step, and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#>   [1]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#>  [26]  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3
#>  [51]  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4
#>  [76]  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5
#> [101]  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6
#> [126]  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7
#> [151]  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8
#> [176]  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9
#> [201] 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10
#> [226] 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11
#> [251] 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12
#> [276]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#> [301]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#> [326]  3  4  5  6  7  8  9 10 11 12

index_keep_q1 <- which(months < 4)
index_keep_q4 <- which(months > 9)

sal_q1 <- sal_array[, , index_keep_q1]
sal_q4 <- sal_array[, , index_keep_q4]

months_keep_q1 <- months[index_keep_q1]
months_keep_q4 <- months[index_keep_q4]

years_keep_q1 <- years[index_keep_q1]
years_keep_q4 <- years[index_keep_q4]

# Now we have an array with data for that quarter
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq_q1 <- seq(1, dim(sal_q1)[3], by = 3)
loop_seq_q4 <- seq(1, dim(sal_q4)[3], by = 3)

# Create objects that will hold data
dlist_q1 <- list()
dlist_q4 <- list()

sal_1 <- c()
sal_2 <- c()
sal_3 <- c()
sal_ave_q1 <- c()

sal_10 <- c()
sal_11 <- c()
sal_12 <- c()
sal_ave_q4 <- c()

# Now average by quarter. The vector loop_seq_q1 is 1, 4, 7 etc. So first i is 1, 2, 3,
# which is the index we want. 

dim(sal_q1)
#> [1] 199 187  83
dim(sal_q4)
#> [1] 199 187  84

# Hmm, we didn't get the first month in the salinity series... repeat month 2 and fill in so the dimensions are correct
sal_q1 <- sal_q1[,,c(1, 1:83)]

dim(sal_q1)
#> [1] 199 187  84

for(i in loop_seq_q1) {
  
  sal_1 <- sal_q1[, , (i)]
  sal_2 <- sal_q1[, , (i + 1)]
  sal_3 <- sal_q1[, , (i + 2)]
  
  sal_10 <- sal_q4[, , (i)]
  sal_11 <- sal_q4[, , (i + 1)]
  sal_12 <- sal_q4[, , (i + 2)]
  
  sal_ave_q1 <- (sal_1 + sal_2 + sal_3) / 3
  sal_ave_q4 <- (sal_10 + sal_11 + sal_12) / 3
  
  list_pos_q1 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  list_pos_q4 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  
  dlist_q1[[list_pos_q1]] <- sal_ave_q1
  dlist_q4[[list_pos_q4]] <- sal_ave_q4
  
}

# Now name the lists with the year:
names(dlist_q1) <- unique(years_keep_q1)
names(dlist_q4) <- unique(years_keep_q4)

# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script

# Filter years in the cpue data frame to only have the years I have salinity for
d_sub_sal_q1 <- dat %>% filter(quarter == 1) %>% filter(year %in% names(dlist_q1)) %>% droplevels()
#> filter: removed 3,778 rows (40%), 5,595 rows remaining
#> filter: no rows removed
d_sub_sal_q4 <- dat %>% filter(quarter == 4) %>% filter(year %in% names(dlist_q4)) %>% droplevels()
#> filter: removed 5,707 rows (61%), 3,666 rows remaining
#> filter: no rows removed

# Create data holding object
sal_data_list_q1 <- list()
sal_data_list_q4 <- list()

# ... And for the salinity raster
raster_list_q1 <- list()
raster_list_q4 <- list()

# Create factor year for indexing the list in the loop
d_sub_sal_q1$year_f <- as.factor(d_sub_sal_q1$year)
d_sub_sal_q4$year_f <- as.factor(d_sub_sal_q4$year)

# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_sal_q1$year_f)) {
  
  # Set plot limits
  ymin = 54; ymax = 58; xmin = 12; xmax = 22
  
  # Subset a year
  sal_slice_q1 <- dlist_q1[[i]]
  sal_slice_q4 <- dlist_q4[[i]]
  
  # Create raster for that year (i)
  r_q1 <- raster(t(sal_slice_q1), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  r_q4 <- raster(t(sal_slice_q4), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  
  # Flip...
  r_q1 <- flip(r_q1, direction = 'y')
  r_q4 <- flip(r_q4, direction = 'y')
  
  plot(r_q1, main = paste(i, "Q1"))
  plot(r_q4, main = paste(i, "Q4"))
  
  # Filter the same year (i) in the cpue data and select only coordinates
  d_slice_q1 <- d_sub_sal_q1 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  d_slice_q4 <- d_sub_sal_q4 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  
  # Make into a SpatialPoints object
  data_sp_q1 <- SpatialPoints(d_slice_q1)
  data_sp_q4 <- SpatialPoints(d_slice_q4)
  
  # Extract raster value (salinity)
  rasValue_q1 <- raster::extract(r_q1, data_sp_q1)
  rasValue_q4 <- raster::extract(r_q4, data_sp_q4)
  
  # Now we want to plot the results of the raster extractions by plotting the cpue
  # data points over a raster and saving it for each year.
  # Make the SpatialPoints object into a raster again (for pl)
  df_q1 <- as.data.frame(data_sp_q1)
  df_q4 <- as.data.frame(data_sp_q4)
  
  # Add in the raster value in the df holding the coordinates for the cpue data
  d_slice_q1$sal <- rasValue_q1
  d_slice_q4$sal <- rasValue_q4
  
  # Add in which year
  d_slice_q1$year <- i
  d_slice_q4$year <- i
  
  # Create a index for the data last where we store all years (because our loop index
  # i is not continuous, we can't use it directly)
  index_q1 <- as.numeric(d_slice_q1$year)[1] - 1992
  index_q4 <- as.numeric(d_slice_q4$year)[1] - 1992
  
  # Add each years' data in the list
  sal_data_list_q1[[index_q1]] <- d_slice_q1
  sal_data_list_q4[[index_q4]] <- d_slice_q4
  
  # Save to check each year is ok! First convert the raster to points for plotting
  # (so that we can use ggplot)
  map_q1 <- rasterToPoints(r_q1)
  map_q4 <- rasterToPoints(r_q4)
  
  # Make the points a dataframe for ggplot
  df_rast_q1 <- data.frame(map_q1)
  df_rast_q4 <- data.frame(map_q4)
  
  # Rename y-variable and add year
  df_rast_q1 <- df_rast_q1 %>% rename("sal" = "layer") %>% mutate(year = i)
  df_rast_q4 <- df_rast_q4 %>% rename("sal" = "layer") %>% mutate(year = i)
  
  # Add each years' raster data frame in the list
  raster_list_q1[[index_q1]] <- df_rast_q1
  raster_list_q4[[index_q4]] <- df_rast_q4
  
  # Make appropriate column headings
  colnames(df_rast_q1) <- c("Longitude", "Latitude", "sal")
  colnames(df_rast_q4) <- c("Longitude", "Latitude", "sal")
  
  # Make a map for q1
  ggplot(data = df_rast_q1, aes(y = Latitude, x = Longitude)) +
    geom_raster(aes(fill = sal)) +
    geom_point(data = d_slice_q1, aes(x = lon, y = lat, fill = sal),
               color = "black", size = 5, shape = 21) +
    theme_bw() +
    geom_sf(data = world, inherit.aes = F, size = 0.2) +
    coord_sf(xlim = c(xmin, xmax),
             ylim = c(ymin, ymax)) +
    scale_colour_gradientn(colours = rev(terrain.colors(10)),
                           limits = c(2, 36)) +
    scale_fill_gradientn(colours = rev(terrain.colors(10)),
                         limits = c(2, 36)) +
    NULL

  ggsave(paste("figures/supp/cpue_sal_rasters/", i,"q1.png", sep = ""),
         width = 6.5, height = 6.5, dpi = 600)

  # Make a map for q4
  ggplot(data = df_rast_q4, aes(y = Latitude, x = Longitude)) +
    geom_raster(aes(fill = sal)) +
    geom_point(data = d_slice_q4, aes(x = lon, y = lat, fill = sal),
               color = "black", size = 5, shape = 21) +
    theme_bw() +
    geom_sf(data = world, inherit.aes = F, size = 0.2) +
    coord_sf(xlim = c(xmin, xmax),
             ylim = c(ymin, ymax)) +
    scale_colour_gradientn(colours = rev(terrain.colors(10)),
                           limits = c(2, 36)) +
    scale_fill_gradientn(colours = rev(terrain.colors(10)),
                         limits = c(2, 36)) +
    NULL

  ggsave(paste("figures/supp/cpue_sal_rasters/", i,"q4.png", sep = ""),
         width = 6.5, height = 6.5, dpi = 600)

}

#> filter: removed 5,494 rows (98%), 101 rows remaining
#> filter: removed 3,605 rows (98%), 61 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,420 rows (97%), 175 rows remaining
#> filter: removed 3,604 rows (98%), 62 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,441 rows (97%), 154 rows remaining
#> filter: removed 3,613 rows (99%), 53 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,412 rows (97%), 183 rows remaining
#> filter: removed 3,605 rows (98%), 61 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,381 rows (96%), 214 rows remaining
#> filter: removed 3,591 rows (98%), 75 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,367 rows (96%), 228 rows remaining
#> filter: removed 3,598 rows (98%), 68 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,405 rows (97%), 190 rows remaining
#> filter: removed 3,572 rows (97%), 94 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,485 rows (98%), 110 rows remaining
#> filter: removed 3,577 rows (98%), 89 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,387 rows (96%), 208 rows remaining
#> filter: removed 3,550 rows (97%), 116 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,450 rows (97%), 145 rows remaining
#> filter: removed 3,549 rows (97%), 117 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,434 rows (97%), 161 rows remaining
#> filter: removed 3,538 rows (97%), 128 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,388 rows (96%), 207 rows remaining
#> filter: removed 3,557 rows (97%), 109 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,377 rows (96%), 218 rows remaining
#> filter: removed 3,515 rows (96%), 151 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,407 rows (97%), 188 rows remaining
#> filter: removed 3,516 rows (96%), 150 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,360 rows (96%), 235 rows remaining
#> filter: removed 3,497 rows (95%), 169 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,373 rows (96%), 222 rows remaining
#> filter: removed 3,490 rows (95%), 176 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,336 rows (95%), 259 rows remaining
#> filter: removed 3,486 rows (95%), 180 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,355 rows (96%), 240 rows remaining
#> filter: removed 3,504 rows (96%), 162 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,352 rows (96%), 243 rows remaining
#> filter: removed 3,486 rows (95%), 180 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,371 rows (96%), 224 rows remaining
#> filter: removed 3,533 rows (96%), 133 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,327 rows (95%), 268 rows remaining
#> filter: removed 3,518 rows (96%), 148 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,387 rows (96%), 208 rows remaining
#> filter: removed 3,490 rows (95%), 176 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,358 rows (96%), 237 rows remaining
#> filter: removed 3,500 rows (95%), 166 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,363 rows (96%), 232 rows remaining
#> filter: removed 3,453 rows (94%), 213 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,350 rows (96%), 245 rows remaining
#> filter: removed 3,441 rows (94%), 225 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,339 rows (95%), 256 rows remaining
#> filter: removed 3,456 rows (94%), 210 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,477 rows (98%), 118 rows remaining
#> filter: removed 3,574 rows (97%), 92 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

#> filter: removed 5,469 rows (98%), 126 rows remaining
#> filter: removed 3,564 rows (97%), 102 rows remaining
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> rename: renamed one variable (sal)
#> mutate: new variable 'year' (character) with one unique value and 0% NA


# Now create a data frame from the list of all annual values
big_dat_sal_q1 <- dplyr::bind_rows(sal_data_list_q1)
big_dat_sal_q4 <- dplyr::bind_rows(sal_data_list_q4)
big_dat_sal <- bind_rows(mutate(big_dat_sal_q1, quarter = 1),
                          mutate(big_dat_sal_q4, quarter = 4))
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA

big_raster_dat_sal_q1 <- dplyr::bind_rows(raster_list_q1)
big_raster_dat_sal_q4 <- dplyr::bind_rows(raster_list_q4)
big_raster_dat_sal <- bind_rows(mutate(big_raster_dat_sal_q1, quarter = 1),
                                 mutate(big_raster_dat_sal_q4, quarter = 4))
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA

# Plot data, looks like there's big inter-annual variation but a positive trend
big_raster_dat_sal %>%
  group_by(quarter, year) %>%
  drop_na(sal) %>%
  summarise(mean_sal = mean(sal)) %>%
  mutate(year_num = as.numeric(year)) %>%
  ggplot(aes(year_num, mean_sal)) +
  geom_point(size = 2) +
  stat_smooth(method = "lm") +
  facet_wrap(~ quarter) +
  NULL
#> group_by: 2 grouping variables (quarter, year)
#> drop_na (grouped): no rows removed
#> summarise: now 56 rows and 3 columns, one group variable remaining (quarter)
#> mutate (grouped): new variable 'year_num' (double) with 28 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'


# Now add in the new salinity column in the original data:
str(d_sub_sal_q1)
#> tibble [5,595 × 19] (S3: tbl_df/tbl/data.frame)
#>  $ year           : int [1:5595] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#>  $ lat            : num [1:5595] 54.7 54.5 54.5 54.5 54.7 ...
#>  $ lon            : num [1:5595] 13 14.3 14.2 14 13.1 ...
#>  $ quarter        : int [1:5595] 1 1 1 1 1 1 1 1 1 1 ...
#>  $ Country        : chr [1:5595] "GFR" "GFR" "GFR" "GFR" ...
#>  $ Month          : int [1:5595] 2 2 2 2 2 2 2 2 2 2 ...
#>  $ haul.id        : chr [1:5595] "1993:1:GFR:SOL:H20:21:1" "1993:1:GFR:SOL:H20:22:32" "1993:1:GFR:SOL:H20:23:31" "1993:1:GFR:SOL:H20:24:30" ...
#>  $ IDx            : chr [1:5595] "1993.1.GFR.06S1.H20.21.1" "1993.1.GFR.06S1.H20.22.32" "1993.1.GFR.06S1.H20.23.31" "1993.1.GFR.06S1.H20.24.30" ...
#>  $ ices_rect      : chr [1:5595] "38G3" "38G4" "38G4" "37G3" ...
#>  $ sub_div        : chr [1:5595] "24" "24" "24" "24" ...
#>  $ length_cm      : num [1:5595] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ id_haul_stomach: chr [1:5595] NA NA NA NA ...
#>  $ species        : chr [1:5595] "cod" "cod" "cod" "cod" ...
#>  $ haul.id.size   : chr [1:5595] "1993:1:GFR:SOL:H20:21:1.0" "1993:1:GFR:SOL:H20:22:32.0" "1993:1:GFR:SOL:H20:23:31.0" "1993:1:GFR:SOL:H20:24:30.0" ...
#>  $ substrate      : chr [1:5595] "sand" "sand" "sand" "sand" ...
#>  $ depth          : num [1:5595] 9 7 8 6 13 15 11 15 10 10 ...
#>  $ id_oxy         : chr [1:5595] "1993_1_13_54.6833" "1993_1_14.2667_54.5167" "1993_1_14.15_54.5167" "1993_1_13.9833_54.4833" ...
#>  $ id_temp        : chr [1:5595] "1993_1_13_54.6833" "1993_1_14.2667_54.5167" "1993_1_14.15_54.5167" "1993_1_13.9833_54.4833" ...
#>  $ year_f         : Factor w/ 28 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(d_sub_sal_q4)
#> tibble [3,666 × 19] (S3: tbl_df/tbl/data.frame)
#>  $ year           : int [1:3666] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#>  $ lat            : num [1:3666] 54.7 54.5 54.5 54.5 54.7 ...
#>  $ lon            : num [1:3666] 13.1 14.3 14.2 14 13.1 ...
#>  $ quarter        : int [1:3666] 4 4 4 4 4 4 4 4 4 4 ...
#>  $ Country        : chr [1:3666] "GFR" "GFR" "GFR" "GFR" ...
#>  $ Month          : int [1:3666] 12 11 11 11 12 12 11 11 11 11 ...
#>  $ haul.id        : chr [1:3666] "1993:4:GFR:SOL:H20:21:65" "1993:4:GFR:SOL:H20:22:43" "1993:4:GFR:SOL:H20:23:44" "1993:4:GFR:SOL:H20:24:46" ...
#>  $ IDx            : chr [1:3666] "1993.4.GFR.06S1.H20.21.65" "1993.4.GFR.06S1.H20.22.43" "1993.4.GFR.06S1.H20.23.44" "1993.4.GFR.06S1.H20.24.46" ...
#>  $ ices_rect      : chr [1:3666] "38G3" "38G4" "38G4" "37G4" ...
#>  $ sub_div        : chr [1:3666] "24" "24" "24" "24" ...
#>  $ length_cm      : num [1:3666] 0 0 0 0 0 0 0 0 0 0 ...
#>  $ id_haul_stomach: chr [1:3666] NA NA NA NA ...
#>  $ species        : chr [1:3666] "cod" "cod" "cod" "cod" ...
#>  $ haul.id.size   : chr [1:3666] "1993:4:GFR:SOL:H20:21:65.0" "1993:4:GFR:SOL:H20:22:43.0" "1993:4:GFR:SOL:H20:23:44.0" "1993:4:GFR:SOL:H20:24:46.0" ...
#>  $ substrate      : chr [1:3666] "sand" "sand" "sand" "sand" ...
#>  $ depth          : num [1:3666] 8 7 8 7 16 17 11 18 10 10 ...
#>  $ id_oxy         : chr [1:3666] "1993_4_13.05_54.6667" "1993_4_14.3167_54.5167" "1993_4_14.2_54.5167" "1993_4_14.0333_54.4833" ...
#>  $ id_temp        : chr [1:3666] "1993_4_13.05_54.6667" "1993_4_14.3167_54.5167" "1993_4_14.2_54.5167" "1993_4_14.0333_54.4833" ...
#>  $ year_f         : Factor w/ 28 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(big_dat_sal)
#> tibble [9,261 × 5] (S3: tbl_df/tbl/data.frame)
#>  $ lon    : num [1:9261] 13 14.3 14.2 14 13.1 ...
#>  $ lat    : num [1:9261] 54.7 54.5 54.5 54.5 54.7 ...
#>  $ sal    : num [1:9261] 12.69 8.85 8.87 9.18 12.78 ...
#>  $ year   : chr [1:9261] "1993" "1993" "1993" "1993" ...
#>  $ quarter: num [1:9261] 1 1 1 1 1 1 1 1 1 1 ...

# Create an ID for matching the salinity data with the cpue data
dat$id_sal <- paste(dat$year, dat$quarter, dat$lon, dat$lat, sep = "_")
big_dat_sal$id_sal <- paste(big_dat_sal$year, big_dat_sal$quarter, big_dat_sal$lon, big_dat_sal$lat, sep = "_")

# Which id's are not in the cpue data (dat)? (It's because I don't have those years, not about the location)
ids <- dat$id_sal[!dat$id_sal %in% c(big_dat_sal$id_sal)]

unique(ids)
#>   [1] "1993_3_16.8935_57.3822" "1993_3_17.0851_57.4723" "1993_3_17.5683_57.4833"
#>   [4] "1993_3_17.9133_57.0566" "1993_3_17.8833_57.3383" "1993_3_18.1158_57.758" 
#>   [7] "1993_3_18.3158_57.7593" "1993_3_19.5258_57.8453" "1993_3_19.465_57.855"  
#>  [10] "1993_3_19.4568_57.9165" "1993_3_19.6068_57.9796" "1993_3_19.2133_57.3967"
#>  [13] "1993_3_18.8516_57.175"  "1993_3_18.8395_57.0797" "1993_3_18.8798_57.0675"
#>  [16] "1993_3_18.4721_56.2501" "1993_3_17.7942_56.0955" "1993_3_17.6995_55.8498"
#>  [19] "1994_3_17.9067_57.0966" "1994_3_17.9883_57.0383" "1994_3_17.625_55.4333" 
#>  [22] "1994_3_17.1167_55.925"  "1994_3_16.5283_55.6783" "1994_3_15.9783_55.4783"
#>  [25] "1994_3_16.0417_55.315"  "1994_3_14.73_55.4583"   "1994_3_14.4983_55.46"  
#>  [28] "1994_3_14.4967_55.665"  "1994_3_15.375_55.9466"  "1995_3_13.225_55.21"   
#>  [31] "1995_3_13.585_55.21"    "1995_3_13.26_54.965"    "1995_3_13.9067_55.01"  
#>  [34] "1995_3_15.4167_55.9767" "1995_3_15.2933_55.6"    "1995_3_14.5017_55.6667"
#>  [37] "1995_3_14.6167_55.5933" "1995_3_14.495_55.46"    "1995_3_14.6833_55.4567"
#>  [40] "1995_3_16.3217_55.525"  "1995_3_18.4017_55.5533" "1995_3_18.9083_56.0017"
#>  [43] "1995_3_18.4633_56.24"   "1995_3_17.775_56.0817"  "1995_3_18.875_57.0617" 
#>  [46] "1995_3_18.8617_57.175"  "1995_3_17.95_56.9933"   "1995_3_17.915_57.065"  
#>  [49] "1995_3_17.6983_55.8466" "1995_3_17.11_55.9283"   "1996_2_18.8_54.6833"   
#>  [52] "1996_2_18.75_54.7333"   "1996_2_18.6833_54.7833" "1996_2_18.7_54.7333"   
#>  [55] "1996_2_18.6_54.8"       "1996_2_18.6333_54.8333" "1996_2_18.6333_54.8833"
#>  [58] "1996_2_18.65_54.8833"   "1996_2_18.6833_54.8833" "1996_2_18.7167_54.8667"
#>  [61] "1996_2_18.7333_54.9"    "1996_2_17.4_55.4667"    "1996_2_17.4333_55.4333"
#>  [64] "1996_2_17.6167_55.4667" "1996_2_17.5833_55.4"    "1996_2_17.45_55.35"    
#>  [67] "1996_2_17.3667_55.2333" "1996_2_18.85_54.9333"   "1997_2_20.5167_56.5667"
#>  [70] "1997_2_19.6833_56.15"   "1997_2_19.2667_56.0667" "1997_2_19.5667_56.3"   
#>  [73] "1997_2_19.8_56.3833"    "1997_2_19.9667_56.4833" "1997_2_20.2333_56.5667"
#>  [76] "1997_2_20.5333_56.65"   "1997_2_20.8_56.7167"    "1997_2_20.8167_56.9167"
#>  [79] "1997_2_20.7833_56.9833" "1997_2_20.35_56.45"     "1997_2_20.2167_56.4667"
#>  [82] "1997_2_20.1333_56.3167" "1997_2_19.8_56.2333"    "1998_2_21.4_57.7167"   
#>  [85] "1998_2_20.6667_57.1333" "1998_2_20.6_57.0833"    "1998_2_20.25_56.9333"  
#>  [88] "1998_2_20.3_56.65"      "1998_2_20.2667_56.5667" "1998_2_20.3333_56.4667"
#>  [91] "1998_2_19.8_56.3833"    "1998_2_21.3667_57.7833" "1998_2_21.2667_57.8333"
#>  [94] "1998_2_21.1167_57.8333" "1998_2_21.1667_57.5667" "1998_2_21.15_57.4833"  
#>  [97] "1998_2_21.2667_57.3667" "1998_2_21.1167_57.25"   "1998_2_20.7167_57.25"  
#> [100] "1999_2_21.1_57.05"      "1999_2_21.1167_57.0333" "1999_2_20.7167_57.1833"
#> [103] "1999_2_20.7333_57.2167" "1999_2_20.7333_57.2667" "1999_2_20.55_57.1667"  
#> [106] "1999_2_20.65_57.05"     "1999_2_20.7167_57.3667" "1999_2_20.8667_57.35"  
#> [109] "1999_2_21.1167_57.25"   "1999_2_21.15_57.2333"   "1999_2_21.2667_57.35"
length(unique(ids))
#> [1] 111
length(unique(dat$id_sal))
#> [1] 9309

# Select only the columns we want to merge
big_dat_sub_sal <- big_dat_sal %>% dplyr::select(id_sal, sal)

# Remove duplicate ID (one salinity value per id)
big_dat_sub_sal2 <- big_dat_sub_sal %>% distinct(id_sal, .keep_all = TRUE)
#> distinct: removed 63 rows (1%), 9,198 rows remaining
env_dat <- left_join(rename(big_dat_sub_oxy2, id_env = id_oxy),
                     rename(big_dat_sub_temp2, id_env = id_temp))
#> rename: renamed one variable (id_env)
#> rename: renamed one variable (id_env)
#> Joining, by = "id_env"
#> left_join: added one column (temp)
#> > rows only in x 0
#> > rows only in y ( 0)
#> > matched rows 9,198
#> > =======
#> > rows total 9,198

env_dat <- left_join(env_dat, rename(big_dat_sub_sal2, id_env = id_sal))
#> rename: renamed one variable (id_env)
#> Joining, by = "id_env"left_join: added one column (sal)
#>            > rows only in x       0
#>            > rows only in y  (    0)
#>            > matched rows     9,198
#>            >                 =======
#>            > rows total       9,198
                     
dat <- dat %>% dplyr::select(-id_oxy, id_sal) %>% rename(id_env = id_temp) %>% left_join(env_dat)
#> rename: renamed one variable (id_env)
#> Joining, by = "id_env"left_join: added 3 columns (oxy, temp, sal)
#>            > rows only in x     112
#>            > rows only in y  (    0)
#>            > matched rows     9,261
#>            >                 =======
#>            > rows total       9,373
  
# Select only columns that are not already in the full_dat 
dat <- dat %>% dplyr::select(haul.id, substrate, depth, temp, oxy, sal)

# Now join these data with the full_dat
dat_full <- left_join(dat_full, dat)
#> Joining, by = "haul.id"
#> left_join: added 5 columns (substrate, depth, temp, oxy, sal)
#> > rows only in x 0
#> > rows only in y ( 0)
#> > matched rows 1,759,609
#> > ===========
#> > rows total 1,759,609

# Temperature
dat_full %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  filter(quarter == 1) %>% 
  ggplot(aes(y = lat, x = lon, color = temp)) +
  geom_point() +
  geom_sf(data = world, inherit.aes = F, size = 0.2) +
  coord_sf(xlim = c(xmin, xmax),
           ylim = c(ymin, ymax)) +
  facet_wrap(~ year) +
  theme(axis.text.x = element_text(angle = 90)) +
  NULL
#> distinct: removed 1,750,236 rows (99%), 9,373 rows remaining
#> filter: removed 3,778 rows (40%), 5,595 rows remaining


dat_full %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  filter(quarter == 4) %>% 
  ggplot(aes(y = lat, x = lon, color = temp)) +
  geom_point() +
  geom_sf(data = world, inherit.aes = F, size = 0.2) +
  coord_sf(xlim = c(xmin, xmax),
           ylim = c(ymin, ymax)) +
  facet_wrap(~ year) +
  theme(axis.text.x = element_text(angle = 90)) +
  NULL
#> distinct: removed 1,750,236 rows (99%), 9,373 rows remaining
#> filter: removed 5,707 rows (61%), 3,666 rows remaining


# Oxygen
dat_full %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  filter(quarter == 1) %>% 
  ggplot(aes(y = lat, x = lon, color = oxy)) +
  geom_point() +
  geom_sf(data = world, inherit.aes = F, size = 0.2) +
  coord_sf(xlim = c(xmin, xmax),
           ylim = c(ymin, ymax)) +
  facet_wrap(~ year) +
  theme(axis.text.x = element_text(angle = 90)) +
  NULL
#> distinct: removed 1,750,236 rows (99%), 9,373 rows remaining
#> filter: removed 3,778 rows (40%), 5,595 rows remaining


dat_full %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  filter(quarter == 4) %>% 
  ggplot(aes(y = lat, x = lon, color = oxy)) +
  geom_point() +
  geom_sf(data = world, inherit.aes = F, size = 0.2) +
  coord_sf(xlim = c(xmin, xmax),
           ylim = c(ymin, ymax)) +
  facet_wrap(~ year) +
  theme(axis.text.x = element_text(angle = 90)) +
  NULL
#> distinct: removed 1,750,236 rows (99%), 9,373 rows remaining
#> filter: removed 5,707 rows (61%), 3,666 rows remaining


# Salinity
dat_full %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  filter(quarter == 1) %>% 
  ggplot(aes(y = lat, x = lon, color = sal)) +
  geom_point() +
  geom_sf(data = world, inherit.aes = F, size = 0.2) +
  coord_sf(xlim = c(xmin, xmax),
           ylim = c(ymin, ymax)) +
  facet_wrap(~ year) +
  theme(axis.text.x = element_text(angle = 90)) +
  NULL
#> distinct: removed 1,750,236 rows (99%), 9,373 rows remaining
#> filter: removed 3,778 rows (40%), 5,595 rows remaining


dat_full %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  filter(quarter == 4) %>% 
  ggplot(aes(y = lat, x = lon, color = sal)) +
  geom_point() +
  geom_sf(data = world, inherit.aes = F, size = 0.2) +
  coord_sf(xlim = c(xmin, xmax),
           ylim = c(ymin, ymax)) +
  facet_wrap(~ year) +
  theme(axis.text.x = element_text(angle = 90)) +
  NULL
#> distinct: removed 1,750,236 rows (99%), 9,373 rows remaining
#> filter: removed 5,707 rows (61%), 3,666 rows remaining



dat_full %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  drop_na(oxy) %>%
  group_by(quarter) %>% 
  summarise(min = min(oxy),
            max = max(oxy))
#> distinct: removed 1,750,236 rows (99%), 9,373 rows remaining
#> drop_na: removed 139 rows (1%), 9,234 rows remaining
#> group_by: one grouping variable (quarter)
#> summarise: now 2 rows and 3 columns, ungrouped
#> # A tibble: 2 × 3
#>   quarter   min   max
#>     <int> <dbl> <dbl>
#> 1       1 -2.76  9.59
#> 2       4 -3.41  8.22

dat_full %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  drop_na(temp) %>%
  group_by(quarter) %>% 
  summarise(min = min(temp),
            max = max(temp))
#> distinct: removed 1,750,236 rows (99%), 9,373 rows remaining
#> drop_na: removed 139 rows (1%), 9,234 rows remaining
#> group_by: one grouping variable (quarter)
#> summarise: now 2 rows and 3 columns, ungrouped
#> # A tibble: 2 × 3
#>   quarter   min   max
#>     <int> <dbl> <dbl>
#> 1       1 0.627  12.1
#> 2       4 3.53   14.5

dat_full %>% 
  distinct(haul.id, .keep_all = TRUE) %>% 
  drop_na(sal) %>%
  group_by(quarter) %>% 
  summarise(min = min(sal),
            max = max(sal))
#> distinct: removed 1,750,236 rows (99%), 9,373 rows remaining
#> drop_na: removed 159 rows (2%), 9,214 rows remaining
#> group_by: one grouping variable (quarter)
#> summarise: now 2 rows and 3 columns, ungrouped
#> # A tibble: 2 × 3
#>   quarter   min   max
#>     <int> <dbl> <dbl>
#> 1       1  6.67  23.8
#> 2       4  6.52  22.9

Add UTM coords

# First add UTM coords
# Add UTM coords

utm_coords <- LongLatToUTM(dat_full$lon, dat_full$lat, zone = 33)
#> Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
#> prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
dat_full$X <- utm_coords$X/1000 # for computational reasons
dat_full$Y <- utm_coords$Y/1000 # for computational reasons

Save data

write.csv(dat_full, file = "data/clean/catch_by_length_q1_q4.csv", row.names = FALSE)

Compare cod data with Orio et al (2017)

knitr::knit_exit()